Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,13 @@ source activate.sh # sets DYLD_LIBRARY_PATH (generated by install.sh)
### 2. Run

```bash
# Baseline model
vllm-swift download mlx-community/Qwen3-4B-4bit
vllm-swift serve ~/models/Qwen3-4B-4bit --max-model-len 4096 # increase as needed, max 40960

# Qwen3.6 ConfigI (requires gated_delta kernels in mlx.metallib)
hf download thetom-ai/Qwen3.6-27B-ConfigI-MLX
vllm-swift serve thetom-ai/Qwen3.6-27B-ConfigI-MLX --max-model-len 4096
```

> Homebrew users don't need `activate.sh` — `vllm-swift serve` handles everything.
Expand Down Expand Up @@ -243,7 +248,13 @@ git clone https://github.com/TheTom/vllm-swift.git
cd vllm-swift
./scripts/install.sh # builds Swift, installs plugin, creates activate.sh
source activate.sh # sets DYLD_LIBRARY_PATH

# Baseline model
vllm serve ~/models/Qwen3-4B-4bit --max-model-len 4096

# Qwen3.6 ConfigI (requires gated_delta kernels in mlx.metallib)
hf download thetom-ai/Qwen3.6-27B-ConfigI-MLX
vllm serve thetom-ai/Qwen3.6-27B-ConfigI-MLX --max-model-len 4096
```

### Manual (full control)
Expand Down Expand Up @@ -292,6 +303,13 @@ cp swift/.build/arm64-apple-macosx/release/mlx.metallib \
$(dirname $(echo $DYLD_LIBRARY_PATH | cut -d: -f1))/
```

If `./scripts/install.sh` warns that `gated_delta` kernels are missing, your Xcode Metal compiler/toolchain may be incomplete or outdated:
```bash
xcodebuild -downloadComponent MetalToolchain
./scripts/install.sh
strings swift/.build/arm64-apple-macosx/release/mlx.metallib | grep gated_delta
```

### Download a model

```bash
Expand Down
156 changes: 156 additions & 0 deletions chat.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>vLLM Chat</title>
<style>
:root {
--bg: #f8fafc;
--card: #ffffff;
--border: #dbe3ec;
--text: #101419;
--muted: #5a6775;
--accent: #0c66d6;
}
html, body { margin: 0; padding: 0; background: var(--bg); color: var(--text); }
body { font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, sans-serif; }
.app { max-width: 980px; margin: 16px auto; padding: 0 12px; }
.panel { background: var(--card); border: 1px solid var(--border); border-radius: 10px; padding: 10px; }
.row { display: flex; gap: 8px; margin: 8px 0; }
.grow { flex: 1; }
label { font-size: 12px; color: var(--muted); display: block; margin-bottom: 4px; }
input, textarea, button {
font: inherit;
border: 1px solid var(--border);
border-radius: 8px;
padding: 8px 10px;
box-sizing: border-box;
}
input, textarea { width: 100%; background: #fff; color: var(--text); }
textarea { min-height: 90px; resize: vertical; }
button { background: #fff; cursor: pointer; }
button.primary { background: var(--accent); color: #fff; border-color: var(--accent); }
#log {
height: 58vh;
overflow: auto;
white-space: pre-wrap;
line-height: 1.35;
padding: 10px;
border: 1px solid var(--border);
border-radius: 8px;
background: #fff;
}
.hint { font-size: 12px; color: var(--muted); margin: 6px 0 0; }
</style>
</head>
<body>
<div class="app">
<div class="panel">
<div class="row">
<div class="grow">
<label for="baseUrl">Base URL</label>
<input id="baseUrl" value="http://127.0.0.1:8000" />
</div>
<div class="grow">
<label for="model">Model</label>
<input id="model" value="thetom-ai/Qwen3.6-27B-ConfigI-MLX" />
</div>
<div class="grow">
<label for="apiKey">API key (optional)</label>
<input id="apiKey" placeholder="sk-..." />
</div>
</div>

<div id="log"></div>

<div class="row">
<div class="grow">
<label for="prompt">Message</label>
<textarea id="prompt" placeholder="Type message, then click Send."></textarea>
<div class="hint">Shortcut: Ctrl+Enter (or Cmd+Enter)</div>
</div>
</div>

<div class="row">
<button id="clearBtn">Clear</button>
<button id="sendBtn" class="primary">Send</button>
</div>
</div>
</div>

<script>
const logEl = document.getElementById("log");
const promptEl = document.getElementById("prompt");
const sendBtn = document.getElementById("sendBtn");
const clearBtn = document.getElementById("clearBtn");

let messages = [{ role: "system", content: "You are a helpful assistant." }];

function append(role, text) {
logEl.textContent += role + ": " + text + "\n\n";
logEl.scrollTop = logEl.scrollHeight;
}

async function send() {
const text = promptEl.value.trim();
if (!text) return;

const baseUrl = document.getElementById("baseUrl").value.replace(/\/+$/, "");
const model = document.getElementById("model").value.trim();
const apiKey = document.getElementById("apiKey").value.trim();
if (!model) return;

promptEl.value = "";
append("user", text);
messages.push({ role: "user", content: text });

sendBtn.disabled = true;
sendBtn.textContent = "Sending...";
try {
const headers = { "Content-Type": "application/json" };
if (apiKey) headers.Authorization = "Bearer " + apiKey;

const res = await fetch(baseUrl + "/v1/chat/completions", {
method: "POST",
headers,
body: JSON.stringify({
model,
messages,
temperature: 0.7
})
});
const data = await res.json();

if (!res.ok) {
append("error", JSON.stringify(data, null, 2));
return;
}

const out = data && data.choices && data.choices[0] &&
data.choices[0].message && data.choices[0].message.content
? data.choices[0].message.content
: JSON.stringify(data);

append("assistant", out);
messages.push({ role: "assistant", content: out });
} catch (err) {
append("error", String(err));
} finally {
sendBtn.disabled = false;
sendBtn.textContent = "Send";
}
}

clearBtn.addEventListener("click", () => {
messages = [{ role: "system", content: "You are a helpful assistant." }];
logEl.textContent = "";
promptEl.focus();
});
sendBtn.addEventListener("click", send);
promptEl.addEventListener("keydown", (e) => {
if ((e.ctrlKey || e.metaKey) && e.key === "Enter") send();
});
</script>
</body>
</html>
83 changes: 80 additions & 3 deletions scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
SWIFT_DIR="$PROJECT_DIR/swift"
BUILD_DIR="$SWIFT_DIR/.build/arm64-apple-macosx/$CONFIG"
CHECKOUT_MLXLM_DIR="$SWIFT_DIR/.build/checkouts/mlx-swift-lm"
CHECKOUT_METALLIB_SCRIPT="$CHECKOUT_MLXLM_DIR/scripts/build-metallib.sh"
CHECKOUT_METALLIB_PATH="$CHECKOUT_MLXLM_DIR/.build/arm64-apple-macosx/$CONFIG/mlx.metallib"

_metallib_has_gdn_kernels() {
local metallib="$1"
[ -f "$metallib" ] || return 1
# Use grep directly on binary data (-a) to avoid pipefail/SIGPIPE false negatives
# from `strings | grep -q` pipelines.
LC_ALL=C grep -aq "gated_delta_step_fused_" "$metallib"
}

_metal_compiler_available() {
xcrun metal -v >/dev/null 2>&1
}

echo "=== vllm-swift installer ==="
echo "Config: $CONFIG"
Expand Down Expand Up @@ -57,7 +72,15 @@ echo ""
# Build Swift bridge
echo "Building Swift bridge ($CONFIG)..."
cd "$SWIFT_DIR"
swift build -c "$CONFIG" 2>&1 | tail -3
SWIFT_BUILD_LOG="$(mktemp)"
if ! swift build -c "$CONFIG" >"$SWIFT_BUILD_LOG" 2>&1; then
echo "ERROR: Swift build failed. Last 80 lines:"
tail -80 "$SWIFT_BUILD_LOG"
rm -f "$SWIFT_BUILD_LOG"
exit 1
fi
tail -3 "$SWIFT_BUILD_LOG"
rm -f "$SWIFT_BUILD_LOG"

DYLIB="$BUILD_DIR/libVLLMBridge.dylib"
if [ ! -f "$DYLIB" ]; then
Expand All @@ -70,20 +93,49 @@ echo ""
# Find and copy MLX metallib
echo "Setting up MLX metallib..."
MLX_METALLIB=""
MLX_METALLIB_FALLBACK=""

if [ -f "$CHECKOUT_METALLIB_SCRIPT" ]; then
echo " Attempting metallib build from mlx-swift-lm checkout..."
if _metal_compiler_available; then
if bash "$CHECKOUT_METALLIB_SCRIPT" "$CONFIG"; then
if [ -f "$CHECKOUT_METALLIB_PATH" ]; then
cp "$CHECKOUT_METALLIB_PATH" "$BUILD_DIR/mlx.metallib"
echo " Built and copied checkout metallib: $CHECKOUT_METALLIB_PATH"
fi
else
echo " WARNING: Checkout metallib build failed. Will fall back to existing metallib candidates."
fi
else
echo " WARNING: Metal compiler is not runnable in this Xcode setup."
echo " Install/update Metal Toolchain, then rerun install:"
echo " xcodebuild -downloadComponent MetalToolchain"
fi
fi

# Check common locations for the metallib
for candidate in \
"$BUILD_DIR/mlx.metallib" \
"$CHECKOUT_METALLIB_PATH" \
"$SWIFT_DIR/.build/artifacts/mlx-swift/mlxc.artifactbundle/"*"/mlx.metallib" \
"$(python3 -c 'import mlx; import os; print(os.path.join(os.path.dirname(mlx.__file__), "lib", "mlx.metallib"))' 2>/dev/null || echo '')" \
"$HOME/Library/Developer/Xcode/DerivedData/"*"/Build/Products/"*"/mlx.metallib"
do
if [ -n "$candidate" ] && [ -f "$candidate" ]; then
MLX_METALLIB="$candidate"
break
if _metallib_has_gdn_kernels "$candidate"; then
MLX_METALLIB="$candidate"
break
fi
if [ -z "$MLX_METALLIB_FALLBACK" ]; then
MLX_METALLIB_FALLBACK="$candidate"
fi
fi
done

if [ -z "$MLX_METALLIB" ] && [ -n "$MLX_METALLIB_FALLBACK" ]; then
MLX_METALLIB="$MLX_METALLIB_FALLBACK"
fi

if [ -n "$MLX_METALLIB" ]; then
if [ "$MLX_METALLIB" != "$BUILD_DIR/mlx.metallib" ]; then
cp "$MLX_METALLIB" "$BUILD_DIR/mlx.metallib"
Expand All @@ -107,6 +159,26 @@ except: pass
echo " To fix: pip install mlx && python3 -c 'import mlx.core; mlx.core.eval(mlx.core.array([1]))' "
fi
fi

if [ -f "$BUILD_DIR/mlx.metallib" ]; then
if _metallib_has_gdn_kernels "$BUILD_DIR/mlx.metallib"; then
echo " Verified: gated_delta kernels present in mlx.metallib"
else
echo " ERROR: gated_delta kernels NOT found in $BUILD_DIR/mlx.metallib"
echo " Models like Qwen3.6-27B-ConfigI-MLX will fail at runtime."
if ! _metal_compiler_available; then
echo " Metal compiler is unavailable. Install/update Metal Toolchain:"
echo " xcodebuild -downloadComponent MetalToolchain"
fi
echo " Verify with: strings $BUILD_DIR/mlx.metallib | grep gated_delta"
if [ "${VLLM_SWIFT_ALLOW_STOCK_METALLIB:-0}" != "1" ]; then
echo " Failing install because required GDN kernels are missing."
echo " Override (not recommended): VLLM_SWIFT_ALLOW_STOCK_METALLIB=1 ./scripts/install.sh"
exit 1
fi
echo " WARNING: continuing because VLLM_SWIFT_ALLOW_STOCK_METALLIB=1"
fi
fi
echo ""

# Find Python 3.10-3.13 (vLLM doesn't support 3.14+)
Expand Down Expand Up @@ -185,5 +257,10 @@ echo ""
echo "Quick start:"
echo " cd $PROJECT_DIR"
echo " source activate.sh"
echo " # Baseline model"
echo " vllm serve ~/models/Qwen3-4B-4bit --max-model-len 4096"
echo ""
echo " # ConfigI model (requires gated_delta kernels)"
echo " hf download thetom-ai/Qwen3.6-27B-ConfigI-MLX"
echo " vllm serve thetom-ai/Qwen3.6-27B-ConfigI-MLX --max-model-len 4096"
echo ""