From c16c8d80c35d16685298c6f46498f1a560381d41 Mon Sep 17 00:00:00 2001 From: Wallace Wadge Date: Thu, 21 May 2026 09:57:40 +0200 Subject: [PATCH] Scope quantization-config served-id inference to Qwen3.6-27B The config.json quantization layout (Q4 weights with a Q8 head, flat Q8, etc.) is shared across many MLX builds, so it cannot identify the model family on its own. _public_model_id_from_metadata was using it as the last-resort signal and unconditionally returning one of the mtplx-qwen36-27b-* ids, so a third-party artifact like Qwen3.6-35B-A3B-4bit-MTPLX-Optimized-Speed was being served as mtplx-qwen36-27b-optimized-quality on /v1/models. Gate that fallback on _public_model_id_from_name already identifying the folder as a Qwen3.6-27B MTPLX variant. Explicit metadata fields (public_model_id, served_model_id, model_id, precision_variant, artifact_role, verified_on.model) still take precedence, so the existing "runtime metadata before folder name" behavior is preserved. Unknown folders now fall through to the basename-sanitized id, e.g. qwen3.6-35b-a3b-4bit-mtplx-optimized-speed. Co-Authored-By: Claude Opus 4.7 (1M context) --- mtplx/default_models.py | 10 ++++++++++ tests/test_default_models.py | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/mtplx/default_models.py b/mtplx/default_models.py index a0d5e02..877a973 100644 --- a/mtplx/default_models.py +++ b/mtplx/default_models.py @@ -209,6 +209,16 @@ def _public_model_id_from_metadata(path: Path) -> str | None: if inferred: return inferred + # The config.json quantization layout (Q4 weights with a Q8 head, flat Q8, + # etc.) is shared by many MLX builds, so it can only distinguish the + # Qwen3.6-27B MTPLX Speed/Quality split — not the model family. Skip this + # refinement for folders that don't already identify as a Qwen3.6-27B MTPLX + # artifact; otherwise a third-party model like + # `Qwen3.6-35B-A3B-4bit-MTPLX-Optimized-Speed` would silently be served as + # `mtplx-qwen36-27b-optimized-quality`. + if _public_model_id_from_name(str(path)) is None: + return None + config = _read_json(path / "config.json") quantization = config.get("quantization") or config.get("quantization_config") if isinstance(quantization, dict): diff --git a/tests/test_default_models.py b/tests/test_default_models.py index 1b73191..33f5ca5 100644 --- a/tests/test_default_models.py +++ b/tests/test_default_models.py @@ -294,3 +294,28 @@ def test_public_model_id_for_ref_maps_unknown_local_name_to_sanitized_id(): public_model_id_for_ref("/tmp/My Custom Local Model!") == "my-custom-local-model" ) + + +def test_public_model_id_for_ref_third_party_q4_q8_keeps_folder_name(tmp_path): + # Regression: a third-party Qwen3.6-35B-A3B build with the same Q4-weights/ + # Q8-head config layout as the Qwen3.6-27B Quality artifact must not be + # served as `mtplx-qwen36-27b-optimized-quality`. + model = tmp_path / "Qwen3.6-35B-A3B-4bit-MTPLX-Optimized-Speed" + model.mkdir() + (model / "config.json").write_text( + json.dumps( + { + "quantization": { + "bits": 4, + "language_model.model.layers.0.mlp.down_proj": {"bits": 8}, + "language_model.model.layers.0.linear_attn.in_proj_qkv": {"bits": 8}, + } + } + ), + encoding="utf-8", + ) + + assert ( + public_model_id_for_ref(model) + == "qwen3.6-35b-a3b-4bit-mtplx-optimized-speed" + )