Skip to content

Commit ad482bf

Browse files
committed
alternative suggested by lite
1 parent 3ddfbd8 commit ad482bf

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

serve/mlc_serve/engine/async_connector.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,10 @@ async def _add_request(self, request: Request) -> ResultQueue:
150150
queue = asyncio.Queue()
151151
self.result_queues[request.request_id] = queue
152152

153-
await asyncio.to_thread(self.engine.add, [request])
153+
try:
154+
await asyncio.to_thread(self.engine.add, [request])
155+
except TextGenerationError as e:
156+
raise asyncio.CancelledError(e)
154157

155158
return queue
156159

serve/mlc_serve/engine/staging_engine.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
ScopedInferenceEngine,
2222
SequenceOutput,
2323
)
24+
from .error import TextGenerationError
2425
from .engine_common import get_new_request_state, prepare_output
2526
from .model_module import ModelModule, TokenizerModule
26-
from ..model.base import get_model_artifact_config
2727
from .staging_engine_worker import (
2828
AddRequestsCommand,
2929
CancelRequestCommand,
@@ -119,13 +119,17 @@ def add(self, requests: list[Request]):
119119
assert isinstance(req.stopping_criteria.stop_sequences, list)
120120

121121
# If the request violates the tokenization, this returns None, so skip.
122-
state = get_new_request_state(
123-
req,
124-
self.conversation_template,
125-
self.tokenizer,
126-
self.model_artifact_config.vocab_size,
127-
)
128-
new_request_states.append(state)
122+
try:
123+
state = get_new_request_state(
124+
req,
125+
self.conversation_template,
126+
self.tokenizer,
127+
self.model_artifact_config.vocab_size,
128+
)
129+
new_request_states.append(state)
130+
except Exception as e:
131+
LOG.warn("Failed to add a request", request_id=req.request_id)
132+
raise TextGenerationError(str(e))
129133

130134
self.command_queue.put(AddRequestsCommand(request_states=new_request_states))
131135

0 commit comments

Comments
 (0)