Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 54 additions & 5 deletions lmdeploy/serve/async_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,57 @@
logger = get_logger('lmdeploy')


def _merge_message_content(msg: Dict) -> Dict:
"""Merge multimodal content blocks and ensure content field exists.

This function normalizes message content to match vLLM's behavior:
1. Missing content field -> add content='' (empty string)
2. None content -> convert to content='' (empty string)
3. String content -> return as-is
4. List content (multimodal) -> merge all text blocks with newline separator

Args:
msg: A message dict with 'role' and optionally 'content' field

Returns:
A message dict with 'content' field guaranteed to exist

Note:
This implementation is based on vLLM's content processing logic.
vLLM uses "\n".join() to merge multiple text blocks from multimodal content.

References:
- vLLM content normalization:
https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/chat_utils.py
See _parse_chat_message_content() and _parse_chat_message_content_parts()
- vLLM text merging logic:
text_prompt = "\n".join(texts)
"""
# If content is missing or None, convert to empty string (matches vLLM behavior)
# This prevents Jinja2 template errors when rendering chat templates
if 'content' not in msg or msg['content'] is None:
result = dict(msg)
result['content'] = ''
return result

# If content is already a string, return as-is
if isinstance(msg['content'], str):
return msg

# If content is a list, merge all text blocks into a single string
# This matches vLLM's behavior: text_prompt = "\n".join(texts)
content_parts = []
for block in msg['content']:
if isinstance(block, dict) and block.get('type') == 'text':
content_parts.append(block.get('text', ''))
merged_content = '\n'.join(content_parts)

# Preserve all other fields in the message (e.g., tool_calls)
result = dict(msg)
result['content'] = merged_content
return result


@dataclasses.dataclass
class GenOut:
"""Pack all response information together."""
Expand Down Expand Up @@ -609,11 +660,9 @@ async def _get_prompt_input(self,
# Change multimodal data to openai text messages, i.e.,
# [{'role': 'user', 'content': [{'type': 'text', 'text': 'hi'}]}] ->
# [{'role': 'user', 'content': 'hi']
if isinstance(prompt, list) and any(isinstance(msg['content'], list) for msg in prompt):
prompt = [
msg if isinstance(msg['content'], str) else dict(role=msg['role'], content=msg['content'][0]['text'])
for msg in prompt
]
# Also ensure all messages have 'content' field (set to None if missing, e.g., assistant with tool_calls)
if isinstance(prompt, list):
prompt = [_merge_message_content(msg) for msg in prompt]
if do_preprocess:
# use adapter's chat template if possible
chat_template = self.chat_template
Expand Down
Loading