1414# limitations under the License.
1515"""
1616
17- from typing import Any , List , Optional
17+ import inspect
18+ from typing import Any , Dict , List , Optional
1819
1920from fastdeploy .entrypoints .openai .usage_calculator import count_tokens
2021from fastdeploy .input .tokenzier_client import AsyncTokenizerClient , ImageDecodeRequest
@@ -34,19 +35,22 @@ def __init__(
3435 data_processor ,
3536 enable_mm_output : Optional [bool ] = False ,
3637 eoi_token_id : Optional [int ] = 101032 ,
38+ eoa_token_id : Optional [int ] = 2048 ,
3739 eos_token_id : Optional [int ] = 2 ,
3840 decoder_base_url : Optional [str ] = None ,
3941 ):
4042 self .data_processor = data_processor
4143 self .enable_mm_output = enable_mm_output
4244 self .eoi_token_id = eoi_token_id
45+ self .eoa_token_id = eoa_token_id
4346 self .eos_token_id = eos_token_id
4447 if decoder_base_url is not None :
4548 self .decoder_client = AsyncTokenizerClient (base_url = decoder_base_url )
4649 else :
4750 self .decoder_client = None
4851 self ._mm_buffer : List [Any ] = [] # Buffer for accumulating image token_ids
4952 self ._end_image_code_request_output : Optional [Any ] = None
53+ self ._audio_buffer : Dict [Any ] = {}
5054 self ._multipart_buffer = []
5155
5256 def enable_multimodal_content (self ):
@@ -80,16 +84,54 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
8084 for request_output in request_outputs :
8185 api_server_logger .debug (f"request_output { request_output } " )
8286 if not self .enable_mm_output :
83- yield self .data_processor .process_response_dict (
84- response_dict = request_output ,
85- stream = stream ,
86- enable_thinking = enable_thinking ,
87- include_stop_str_in_output = include_stop_str_in_output ,
88- )
87+ outputs = request_output .get ("outputs" , None )
88+ token_ids = outputs .get ("token_ids" , None ) if outputs is not None else None
89+ req_id = request_output .get ("request_id" , None )
90+ if outputs is not None and token_ids is not None and req_id is not None :
91+ decode_type = request_output ["outputs" ].get ("decode_type" , 0 ) or 0
92+ if decode_type == 0 : # text
93+ tts = req_id in self ._audio_buffer
94+ if token_ids [- 1 ] == self .eos_token_id :
95+ all_audio_tokens = self ._audio_buffer .pop (req_id , [])
96+ else :
97+ all_audio_tokens = None
98+ if inspect .iscoroutinefunction (self .data_processor .process_response_dict ):
99+ response = await self .data_processor .process_response_dict (
100+ response_dict = request_output ,
101+ stream = stream ,
102+ enable_thinking = enable_thinking ,
103+ include_stop_str_in_output = include_stop_str_in_output ,
104+ audio_tokens = all_audio_tokens ,
105+ tts = tts ,
106+ )
107+ else :
108+ response = self .data_processor .process_response_dict (
109+ response_dict = request_output ,
110+ stream = stream ,
111+ enable_thinking = enable_thinking ,
112+ include_stop_str_in_output = include_stop_str_in_output ,
113+ audio_tokens = all_audio_tokens ,
114+ tts = tts ,
115+ )
116+ yield response
117+ elif decode_type == 2 : # audio
118+ if self .eoa_token_id is not None and self .eoa_token_id in token_ids :
119+ continue
120+ if req_id in self ._audio_buffer :
121+ self ._audio_buffer [req_id ].append (token_ids )
122+ else :
123+ self ._audio_buffer [req_id ] = [token_ids ]
124+ else :
125+ yield self .data_processor .process_response_dict (
126+ response_dict = request_output ,
127+ stream = stream ,
128+ enable_thinking = enable_thinking ,
129+ include_stop_str_in_output = include_stop_str_in_output ,
130+ )
89131 elif stream :
90132 decode_type = request_output ["outputs" ].get ("decode_type" , 0 )
91133 token_ids = request_output ["outputs" ]["token_ids" ]
92- if decode_type == 0 :
134+ if decode_type == 0 : # text
93135 if self .eoi_token_id and self .eoi_token_id in token_ids :
94136 if self ._mm_buffer :
95137 all_tokens = self ._mm_buffer
@@ -118,7 +160,7 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
118160 request_output ["outputs" ]["multipart" ] = [text ]
119161 yield request_output
120162
121- elif decode_type == 1 :
163+ elif decode_type == 1 : # image
122164 self ._mm_buffer .append (token_ids )
123165 self ._end_image_code_request_output = request_output
124166 else :
0 commit comments