diff --git a/mtplx/default_models.py b/mtplx/default_models.py index a0d5e02..877a973 100644 --- a/mtplx/default_models.py +++ b/mtplx/default_models.py @@ -209,6 +209,16 @@ def _public_model_id_from_metadata(path: Path) -> str | None: if inferred: return inferred + # The config.json quantization layout (Q4 weights with a Q8 head, flat Q8, + # etc.) is shared by many MLX builds, so it can only distinguish the + # Qwen3.6-27B MTPLX Speed/Quality split — not the model family. Skip this + # refinement for folders that don't already identify as a Qwen3.6-27B MTPLX + # artifact; otherwise a third-party model like + # `Qwen3.6-35B-A3B-4bit-MTPLX-Optimized-Speed` would silently be served as + # `mtplx-qwen36-27b-optimized-quality`. + if _public_model_id_from_name(str(path)) is None: + return None + config = _read_json(path / "config.json") quantization = config.get("quantization") or config.get("quantization_config") if isinstance(quantization, dict): diff --git a/tests/test_default_models.py b/tests/test_default_models.py index 1b73191..33f5ca5 100644 --- a/tests/test_default_models.py +++ b/tests/test_default_models.py @@ -294,3 +294,28 @@ def test_public_model_id_for_ref_maps_unknown_local_name_to_sanitized_id(): public_model_id_for_ref("/tmp/My Custom Local Model!") == "my-custom-local-model" ) + + +def test_public_model_id_for_ref_third_party_q4_q8_keeps_folder_name(tmp_path): + # Regression: a third-party Qwen3.6-35B-A3B build with the same Q4-weights/ + # Q8-head config layout as the Qwen3.6-27B Quality artifact must not be + # served as `mtplx-qwen36-27b-optimized-quality`. + model = tmp_path / "Qwen3.6-35B-A3B-4bit-MTPLX-Optimized-Speed" + model.mkdir() + (model / "config.json").write_text( + json.dumps( + { + "quantization": { + "bits": 4, + "language_model.model.layers.0.mlp.down_proj": {"bits": 8}, + "language_model.model.layers.0.linear_attn.in_proj_qkv": {"bits": 8}, + } + } + ), + encoding="utf-8", + ) + + assert ( + public_model_id_for_ref(model) + == "qwen3.6-35b-a3b-4bit-mtplx-optimized-speed" + )