Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7b13ea4
Fix Qwen tool call OpenAI translation
samuelfaj Apr 25, 2026
5c9b4e8
Preserve tool schemas after streamed content
samuelfaj Apr 25, 2026
5594261
Coerce generic tool arguments from schema
samuelfaj Apr 25, 2026
7fa174d
Handle additional OpenCode tool call formats
samuelfaj Apr 25, 2026
ff6f247
Preserve code brackets near partial tool markers
samuelfaj Apr 25, 2026
0b64dcf
Fix PR check failures
samuelfaj May 4, 2026
8b42dc6
Add serve TUI monitor
samuelfaj May 4, 2026
4d5a3b7
Fix TUI PR CI failures
samuelfaj May 4, 2026
b2b98b2
Add TUI request throughput metrics
samuelfaj May 4, 2026
7f3a1ee
Enhance serve TUI request metrics
samuelfaj May 4, 2026
a1a188e
Improve Hermes tool-call recovery
samuelfaj May 4, 2026
3841801
Merge remote-tracking branch 'origin/add-serve-tui' into new-main
samuelfaj May 4, 2026
bbc6136
Merge remote-tracking branch 'origin/hermes-pr204-tool-recovery' into…
samuelfaj May 4, 2026
bfeb2f2
Add JANG model loader integration
samuelfaj May 5, 2026
907d343
Merge pull request #1 from samuelfaj/add-jangtq-loader
samuelfaj May 5, 2026
4ce7046
Patch DeepSeek V4 JANGTQ tokenizer loading
samuelfaj May 5, 2026
1746f84
Apply JANG tokenizer metadata
samuelfaj May 5, 2026
7ac0c59
Patch JANGTQ RoPE batching offset
samuelfaj May 5, 2026
197243b
Use direct generation for DeepSeek V4 JANGTQ
samuelfaj May 5, 2026
1ad7852
Wait for server readiness before TUI
samuelfaj May 5, 2026
9fd2f5a
Stream direct JANGTQ generation
samuelfaj May 5, 2026
0ee615b
Track direct JANGTQ prefill progress
samuelfaj May 5, 2026
eebf7dd
Cap default direct JANG generation
samuelfaj May 5, 2026
63eabbb
Sanitize direct JANG tool prompts
samuelfaj May 5, 2026
05c1f30
Merge remote-tracking branch 'upstream/main'
samuelfaj May 5, 2026
ae6a2af
Restore direct JANG tool execution
samuelfaj May 5, 2026
9b0bb10
Improve direct JANG tool artifact fallback
samuelfaj May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ vision = [
embeddings = [
"mlx-embeddings>=0.0.5",
]
# JANG/JANGTQ model support via jang-tools.
jang = [
"jang[mlx]>=2.1.5; python_version >= '3.11'",
]
# Gradio chat UI
chat = [
"gradio>=4.0.0",
Expand Down
13 changes: 13 additions & 0 deletions tests/test_chat_tool_retry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from vllm_mlx.routes.chat import _looks_like_deferred_tool_use


def test_deferred_tool_use_detects_intent_text():
assert _looks_like_deferred_tool_use("Let me write the files individually.")


def test_deferred_tool_use_detects_raw_write_file_tail():
assert _looks_like_deferred_tool_use('", "path": "/tmp/tsconfig.json"}')


def test_deferred_tool_use_ignores_plain_answer():
assert not _looks_like_deferred_tool_use("The API exposes users and products.")
41 changes: 41 additions & 0 deletions tests/test_cli_tui_ready.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json
import urllib.error

from vllm_mlx.cli import _wait_for_server_ready


class _FakeResponse:
def __init__(self, payload):
self._payload = payload

def __enter__(self):
return self

def __exit__(self, *exc):
return None

def read(self):
return json.dumps(self._payload).encode("utf-8")


def test_wait_for_server_ready_waits_until_model_loaded(monkeypatch):
responses = [
urllib.error.URLError("not listening"),
{"status": "healthy", "model_loaded": False},
{"status": "healthy", "model_loaded": True},
]
sleeps = []

def fake_urlopen(url, timeout):
next_response = responses.pop(0)
if isinstance(next_response, Exception):
raise next_response
return _FakeResponse(next_response)

monkeypatch.setattr("urllib.request.urlopen", fake_urlopen)
monkeypatch.setattr("time.sleep", lambda seconds: sleeps.append(seconds))

_wait_for_server_ready("http://127.0.0.1:8010", timeout_s=5)

assert sleeps == [0.25, 0.25]
assert responses == []
Loading
Loading