diff --git a/lmdeploy/serve/openai/reasoning_parser/__init__.py b/lmdeploy/serve/openai/reasoning_parser/__init__.py
index 09d621a252..b1e470d101 100644
--- a/lmdeploy/serve/openai/reasoning_parser/__init__.py
+++ b/lmdeploy/serve/openai/reasoning_parser/__init__.py
@@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from .qwen_qwq_reasoning_parser import QwenQwQReasoningParser
+from .gpt_oss_reasoning_parser import GPTOssReasoningParser
from .reasoning_parser import ReasoningParser, ReasoningParserManager
-__all__ = ['ReasoningParser', 'ReasoningParserManager', 'DeepSeekR1ReasoningParser', 'QwenQwQReasoningParser']
+__all__ = ['ReasoningParser', 'ReasoningParserManager', 'DeepSeekR1ReasoningParser', 'QwenQwQReasoningParser', 'GPTOssReasoningParser']
diff --git a/lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py b/lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py
index a6b7e3a602..a173aba5aa 100644
--- a/lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py
+++ b/lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py
@@ -1,5 +1,8 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-# modified from https://github.com/vllm-project/vllm/tree/v0.7.3/vllm/entrypoints/openai/reasoning_parsers
+# Reasoning parser for GPT-OSS style channels.
+# Recognizes:
+# <|channel|>analysis<|message|> ... <|end|>
+# <|start|>assistant<|channel|>final<|message|> ...
+# <|channel|>final<|message|> ...
import re
from typing import Optional, Sequence, Tuple, Union
@@ -8,30 +11,55 @@
from .reasoning_parser import ReasoningParser, ReasoningParserManager
-@ReasoningParserManager.register_module(name='deepseek-r1')
-class DeepSeekR1ReasoningParser(ReasoningParser):
- """Reasoning parser for DeepSeek R1 model.
+@ReasoningParserManager.register_module(name='gpt-oss')
+class GPTOssReasoningParser(ReasoningParser):
+ ANALYSIS="<|channel|>analysis<|message|>"
+ FINAL1="<|start|>assistant<|channel|>final<|message|>"
+ FINAL2="<|channel|>final<|message|>"
+ END="<|end|>"
+ """Parser that splits LMDeploy-style channel tags into reasoning and final.
- The DeepSeek R1 model uses ... tokens to denote reasoning text. This parser extracts the reasoning
- content from the model output.
+ Streaming behavior:
+ - Tokens between analysis-start and <|end|> -> reasoning_content
+ - Tokens after final-start markers -> content
+ Non-streaming behavior:
+ - Extract both channels from the full string with regex.
"""
def __init__(self, tokenizer: object):
super().__init__(tokenizer)
- self.think_start_token = ''
- self.think_end_token = ''
+ if not self.model_tokenizer:
+ raise ValueError('The model tokenizer must be passed to the ReasoningParser constructor.')
- self.reasoning_regex = re.compile(rf'{self.think_start_token}(.*?){self.think_end_token}', re.DOTALL)
+ # Raw tag strings
+ self.analysis_start = '<|channel|>analysis<|message|>'
+ self.final_start_with_assistant = '<|start|>assistant<|channel|>final<|message|>'
+ self.final_start_plain = '<|channel|>final<|message|>'
+ self.end_tag = '<|end|>'
- if not self.model_tokenizer:
- raise ValueError('The model tokenizer must be passed to the ReasoningParser '
- 'constructor during construction.')
+ # For non-streaming extraction
+ self._re_analysis = re.compile(r'<\|channel\|>analysis<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+ # Allow both final markers
+ self._re_final_with_assistant = re.compile(r'<\|start\|>assistant<\|channel\|>final<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+ self._re_final_plain = re.compile(r'(?:<\|start\|>assistant)?<\|channel\|>final<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+
+ # Token ids for streaming checks
+ self.analysis_start_id = self.vocab.get(self.analysis_start)
+ self.final_start_with_assistant_id = self.vocab.get(self.final_start_with_assistant)
+ self.final_start_plain_id = self.vocab.get(self.final_start_plain)
+ self.end_id = self.vocab.get(self.end_tag)
+
+ def _strip_tags(self, text: Optional[str]) -> Optional[str]:
+ if text is None:
+ return None
+ # Remove any tag-like tokens and common fragments produced by split
+ text = re.sub(r'<\|[^>]*?\|>', '', text)
+
+ # Also drop standalone fragments likely from tag splits
+ text = re.sub(r'(?i)(analysis|final|assistant)', '', text)
+
+ return text
- self.think_start_token_id = self.vocab.get(self.think_start_token)
- self.think_end_token_id = self.vocab.get(self.think_end_token)
- if (self.think_start_token_id is None or self.think_end_token_id is None):
- raise RuntimeError('DeepSeek R1 reasoning parser could not locate think start/end '
- 'tokens in the tokenizer!')
def extract_reasoning_content_streaming(
self,
@@ -43,98 +71,96 @@ def extract_reasoning_content_streaming(
delta_token_ids: Sequence[int],
**kwargs,
) -> Union[DeltaMessage, None]:
- """Instance method that should be implemented for extracting reasoning
- from an incomplete response; for use when handling reasoning calls and
- streaming.
-
- Has to be an instance method because it requires state - the current tokens/diffs, but also the information
- about what has previously been parsed and extracted (see constructor)
- """
- # Skip single special tokens
- if len(delta_token_ids) == 1:
- if delta_token_ids[0] == self.think_end_token_id:
- return DeltaMessage(content='')
- elif delta_token_ids[0] == self.think_start_token_id:
- return None
-
- # Check if is present in previous or delta.
- # Keep compatibility with models that don't generate tokens.
- if self.think_start_token_id in previous_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in previous, in delta,
- # extract reasoning content
- end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[:end_index]
- content = delta_text[end_index + len(self.think_end_token):]
- return DeltaMessage(reasoning_content=reasoning_content, content=content if content else None)
- elif self.think_end_token_id in previous_token_ids:
- # in previous, in previous,
- return DeltaMessage(content=delta_text)
+ # String-based parsing aligned with vLLM style: use tags in text context
+ prev = previous_text or ''
+ delta = delta_text or ''
+ text = prev + delta
+
+ def analysis_open(t: str) -> bool:
+ i = t.rfind(self.ANALYSIS)
+ if i == -1:
+ return False
+ j = t.find(self.END, i + len(self.ANALYSIS))
+ return j == -1 # no END after last ANALYSIS
+
+ def final_open(t: str) -> bool:
+ i1 = t.rfind(self.FINAL1)
+ i2 = t.rfind(self.FINAL2)
+ i = max(i1, i2)
+ return i != -1
+
+ # Case A: analysis is already open from previous_text
+ if analysis_open(prev):
+ if self.END in delta:
+ cut = delta.find(self.END)
+ reasoning_part = delta[:cut]
+ remainder = delta[cut + len(self.END):]
+ # If final marker appears in remainder, only keep content after marker
+ if self.FINAL1 in remainder:
+ content_part = remainder.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in remainder:
+ content_part = remainder.split(self.FINAL2, 1)[1]
+ else:
+ content_part = remainder or None
+ # Drop a trailing END if any
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(reasoning_content=self._strip_tags(reasoning_part) or None, content=self._strip_tags(content_part) or None)
else:
- # in previous, no in previous or delta,
- # reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
- elif self.think_start_token_id in delta_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in delta, in delta, extract reasoning content
- start_index = delta_text.find(self.think_start_token)
- end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[start_index + len(self.think_start_token):end_index]
- content = delta_text[end_index + len(self.think_end_token):]
- return DeltaMessage(reasoning_content=reasoning_content, content=content if content else None)
+ return DeltaMessage(reasoning_content=self._strip_tags(delta) or None)
+
+ # Case B: analysis starts within this delta
+ if self.ANALYSIS in delta:
+ aft = delta.split(self.ANALYSIS, 1)[1]
+ if self.END in aft:
+ cut = aft.find(self.END)
+ reasoning_part = aft[:cut]
+ remainder = aft[cut + len(self.END):]
+ if self.FINAL1 in remainder:
+ content_part = remainder.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in remainder:
+ content_part = remainder.split(self.FINAL2, 1)[1]
+ else:
+ content_part = remainder or None
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(reasoning_content=self._strip_tags(reasoning_part) or None, content=self._strip_tags(content_part) or None)
else:
- # in delta, no in delta,
- # reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
- else:
- # No in previous or delta, also need to check for .
- # Because the model may have generated without
- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
- if self.think_end_token_id in delta_token_ids:
- # in delta with more tokens,
- # extract reasoning content and content
- end_index = delta_text.find(self.think_end_token)
- reasoning_content = delta_text[:end_index]
- content = delta_text[end_index + len(self.think_end_token):]
- return DeltaMessage(reasoning_content=reasoning_content, content=content if content else None)
- elif self.think_end_token_id in previous_token_ids:
- # in previous, thinking content ends
- return DeltaMessage(content=delta_text)
+ return DeltaMessage(reasoning_content=self._strip_tags(aft) or None)
+
+ # Case C: final has started (previously or within delta)
+ if final_open(prev) or self.FINAL1 in delta or self.FINAL2 in delta:
+ if self.FINAL1 in delta:
+ content_part = delta.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in delta:
+ content_part = delta.split(self.FINAL2, 1)[1]
else:
- # no in previous or delta, reasoning content continues
- return DeltaMessage(reasoning_content=delta_text)
-
- def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest,
- **kwargs) -> Tuple[Optional[str], Optional[str]]:
- """Extract reasoning content from a complete model-generated string.
-
- Used for non-streaming responses where we have the entire model response
- available before sending to the client.
-
- Args:
- model_output (str): The model-generated string to extract reasoning content from.
- request (ChatCompletionRequest): he request object that was used to generate the model_output.
-
- Returns:
- reasoning_content (str | None): The reasoning content.
- final_output (str | None): The content.
- """
- # DeepSeek R1 doesn't generate now.
- # Thus we assume the reasoning content is always at the start.
- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
- if self.think_end_token not in model_output:
- return model_output, None
- else:
- # Add a start token if it's missing to keep compatibility.
- if self.think_start_token not in model_output:
- model_output = f'{self.think_start_token}{model_output}'
- # Use a regex to find the reasoning content
- reasoning_content = self.reasoning_regex.findall(model_output)[0]
-
- end_index = len(f'{self.think_start_token}{reasoning_content}{self.think_end_token}')
- final_output = model_output[end_index:]
-
- if len(final_output) == 0:
- return reasoning_content, None
-
- return reasoning_content, final_output
+ content_part = delta
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(content=self._strip_tags(content_part) or None)
+
+ # Default: treat as reasoning until markers appear
+ return DeltaMessage(reasoning_content=self._strip_tags(delta) or None)
+
+ def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest, **kwargs):
+ text = model_output or ''
+ # Extract analysis between ANALYSIS and END
+ reasoning = None
+ final = None
+ if self.ANALYSIS in text and self.END in text:
+ a = text.split(self.ANALYSIS, 1)[1]
+ reasoning = a.split(self.END, 1)[0]
+ # Extract final after FINAL1/FINAL2
+ if self.FINAL1 in text:
+ final = text.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in text:
+ final = text.split(self.FINAL2, 1)[1]
+ # Cleanup trailing END if present in final
+ if final is not None:
+ final = final.split(self.END, 1)[0]
+ final = final or None
+ # If no tags at all, treat whole as final
+ if reasoning is None and final is None and text:
+ final = text
+ return self._strip_tags(reasoning), self._strip_tags(final)
diff --git a/lmdeploy/serve/openai/reasoning_parser/gpt_oss_reasoning_parser.py b/lmdeploy/serve/openai/reasoning_parser/gpt_oss_reasoning_parser.py
new file mode 100644
index 0000000000..229923abe0
--- /dev/null
+++ b/lmdeploy/serve/openai/reasoning_parser/gpt_oss_reasoning_parser.py
@@ -0,0 +1,164 @@
+# Reasoning parser for GPT-OSS style channels.
+# Recognizes:
+# <|channel|>analysis<|message|> ... <|end|>
+# <|start|>assistant<|channel|>final<|message|> ...
+# <|channel|>final<|message|> ...
+import re
+from typing import Optional, Sequence, Tuple, Union
+
+from lmdeploy.serve.openai.protocol import ChatCompletionRequest, DeltaMessage
+
+from .reasoning_parser import ReasoningParser, ReasoningParserManager
+
+
+@ReasoningParserManager.register_module(name='gpt-oss')
+class GPTOssReasoningParser(ReasoningParser):
+ ANALYSIS="<|channel|>analysis<|message|>"
+ FINAL1="<|start|>assistant<|channel|>final<|message|>"
+ FINAL2="<|channel|>final<|message|>"
+ END="<|end|>"
+ """Parser that splits LMDeploy-style channel tags into reasoning and final.
+
+ Streaming behavior:
+ - Tokens between analysis-start and <|end|> -> reasoning_content
+ - Tokens after final-start markers -> content
+ Non-streaming behavior:
+ - Extract both channels from the full string with regex.
+ """
+
+ def __init__(self, tokenizer: object):
+ super().__init__(tokenizer)
+ if not self.model_tokenizer:
+ raise ValueError('The model tokenizer must be passed to the ReasoningParser constructor.')
+
+ # Raw tag strings
+ self.analysis_start = '<|channel|>analysis<|message|>'
+ self.final_start_with_assistant = '<|start|>assistant<|channel|>final<|message|>'
+ self.final_start_plain = '<|channel|>final<|message|>'
+ self.end_tag = '<|end|>'
+
+ # For non-streaming extraction
+ self._re_analysis = re.compile(r'<\|channel\|>analysis<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+ # Allow both final markers
+ self._re_final_with_assistant = re.compile(r'<\|start\|>assistant<\|channel\|>final<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+ self._re_final_plain = re.compile(r'(?:<\|start\|>assistant)?<\|channel\|>final<\|message\|>(.*?)(?=(?:<\|end\|>|$))', re.S)
+
+ # Token ids for streaming checks
+ self.analysis_start_id = self.vocab.get(self.analysis_start)
+ self.final_start_with_assistant_id = self.vocab.get(self.final_start_with_assistant)
+ self.final_start_plain_id = self.vocab.get(self.final_start_plain)
+ self.end_id = self.vocab.get(self.end_tag)
+
+ def _strip_tags(self, text: Optional[str]) -> Optional[str]:
+ if text is None:
+ return None
+ # Remove any tag-like tokens and common fragments produced by split
+ text = re.sub(r'<\|[^>]*?\|>', '', text)
+ # Also drop standalone fragments likely from tag splits
+ text = re.sub(r'(?:analysis|final|assistant)', '', text)
+ return text
+
+
+ def extract_reasoning_content_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ **kwargs,
+ ) -> Union[DeltaMessage, None]:
+ # String-based parsing aligned with vLLM style: use tags in text context
+ prev = previous_text or ''
+ delta = delta_text or ''
+ text = prev + delta
+
+ def analysis_open(t: str) -> bool:
+ i = t.rfind(self.ANALYSIS)
+ if i == -1:
+ return False
+ j = t.find(self.END, i + len(self.ANALYSIS))
+ return j == -1 # no END after last ANALYSIS
+
+ def final_open(t: str) -> bool:
+ i1 = t.rfind(self.FINAL1)
+ i2 = t.rfind(self.FINAL2)
+ i = max(i1, i2)
+ return i != -1
+
+ # Case A: analysis is already open from previous_text
+ if analysis_open(prev):
+ if self.END in delta:
+ cut = delta.find(self.END)
+ reasoning_part = delta[:cut]
+ remainder = delta[cut + len(self.END):]
+ # If final marker appears in remainder, only keep content after marker
+ if self.FINAL1 in remainder:
+ content_part = remainder.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in remainder:
+ content_part = remainder.split(self.FINAL2, 1)[1]
+ else:
+ content_part = remainder or None
+ # Drop a trailing END if any
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(reasoning_content=self._strip_tags(reasoning_part) or None, content=self._strip_tags(content_part) or None)
+ else:
+ return DeltaMessage(reasoning_content=self._strip_tags(delta) or None)
+
+ # Case B: analysis starts within this delta
+ if self.ANALYSIS in delta:
+ aft = delta.split(self.ANALYSIS, 1)[1]
+ if self.END in aft:
+ cut = aft.find(self.END)
+ reasoning_part = aft[:cut]
+ remainder = aft[cut + len(self.END):]
+ if self.FINAL1 in remainder:
+ content_part = remainder.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in remainder:
+ content_part = remainder.split(self.FINAL2, 1)[1]
+ else:
+ content_part = remainder or None
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(reasoning_content=self._strip_tags(reasoning_part) or None, content=self._strip_tags(content_part) or None)
+ else:
+ return DeltaMessage(reasoning_content=self._strip_tags(aft) or None)
+
+ # Case C: final has started (previously or within delta)
+ if final_open(prev) or self.FINAL1 in delta or self.FINAL2 in delta:
+ if self.FINAL1 in delta:
+ content_part = delta.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in delta:
+ content_part = delta.split(self.FINAL2, 1)[1]
+ else:
+ content_part = delta
+ if content_part and self.END in content_part:
+ content_part = content_part.split(self.END, 1)[0]
+ return DeltaMessage(content=self._strip_tags(content_part) or None)
+
+ # Default: treat as reasoning until markers appear
+ return DeltaMessage(reasoning_content=self._strip_tags(delta) or None)
+
+ def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest, **kwargs):
+ text = model_output or ''
+ # Extract analysis between ANALYSIS and END
+ reasoning = None
+ final = None
+ if self.ANALYSIS in text and self.END in text:
+ a = text.split(self.ANALYSIS, 1)[1]
+ reasoning = a.split(self.END, 1)[0]
+ # Extract final after FINAL1/FINAL2
+ if self.FINAL1 in text:
+ final = text.split(self.FINAL1, 1)[1]
+ elif self.FINAL2 in text:
+ final = text.split(self.FINAL2, 1)[1]
+ # Cleanup trailing END if present in final
+ if final is not None:
+ final = final.split(self.END, 1)[0]
+ final = final or None
+ # If no tags at all, treat whole as final
+ if reasoning is None and final is None and text:
+ final = text
+ return self._strip_tags(reasoning), self._strip_tags(final)