diff --git a/omlx/admin/accuracy_benchmark.py b/omlx/admin/accuracy_benchmark.py
index 74eb7fcd2..5fd0b1ef3 100644
--- a/omlx/admin/accuracy_benchmark.py
+++ b/omlx/admin/accuracy_benchmark.py
@@ -10,6 +10,7 @@
 
 import asyncio
 import logging
+import sys
 import time
 import uuid
 from dataclasses import dataclass, field
@@ -47,6 +48,10 @@ class AccuracyBenchmarkRequest(BaseModel):
     benchmarks: dict[str, int]  # name -> sample_size (0 = full dataset)
     batch_size: int = 1
     enable_thinking: bool = False
+    # Ephemeral ModelSettings overrides applied for the duration of this run
+    # only. Persisted settings are untouched. None / empty means "use whatever
+    # is on disk". Unknown keys are dropped with a warning by the manager.
+    settings_override: Optional[dict[str, Any]] = None
 
     @field_validator("batch_size")
     @classmethod
@@ -283,7 +288,21 @@ async def run_accuracy_benchmark(
     engine_pool._suppress_ttl = True
     start_time = time.time()
 
+    # Apply per-run setting overrides (from the bench-tab settings panel) for
+    # the duration of this run only. Persisted model_settings.json is
+    # untouched. Engine-init flags are picked up by the model load below;
+    # sampling-class overrides flow through get_settings() into sampling_kwargs.
+    # Entered inside `try:` so any exception during __enter__ is caught by
+    # the existing handlers and the finally block releases cleanly.
+    sm = getattr(engine_pool, "_settings_manager", None)
+    override_ctx = None
+
     try:
+        if sm is not None and request.settings_override:
+            override_ctx = sm.ephemeral_overrides(
+                request.model_id, request.settings_override
+            )
+            override_ctx.__enter__()
         # Phase 1: Unload all models
         loaded_ids = engine_pool.get_loaded_model_ids()
         if loaded_ids:
@@ -499,3 +518,13 @@ async def on_progress(current: int, total: int) -> None:
     finally:
         # Re-enable TTL auto-unload
         engine_pool._suppress_ttl = False
+        if override_ctx is not None:
+            try:
+                # Pass through the live exception info (if any) so the context
+                # manager sees the same triple Python's `with` would supply.
+                override_ctx.__exit__(*sys.exc_info())
+            except Exception as e:
+                logger.warning(
+                    f"Accuracy benchmark: failed to release ephemeral "
+                    f"overrides for {request.model_id}: {e}"
+                )
diff --git a/omlx/admin/benchmark.py b/omlx/admin/benchmark.py
index 7f0d620b1..1c7b2afd1 100644
--- a/omlx/admin/benchmark.py
+++ b/omlx/admin/benchmark.py
@@ -9,6 +9,7 @@
 import json
 import logging
 import re
+import sys
 import time
 import uuid
 from dataclasses import dataclass, field
@@ -43,6 +44,11 @@ class BenchmarkRequest(BaseModel):
     prompt_lengths: list[int]
     generation_length: int = 128
     batch_sizes: list[int] = []
+    # Ephemeral ModelSettings overrides applied for the duration of this run
+    # only. Persisted settings are untouched. None / empty means "use whatever
+    # is on disk". Unknown keys are dropped with a warning by the manager.
+    settings_override: Optional[dict[str, Any]] = None
+
     @field_validator("prompt_lengths")
     @classmethod
     def validate_prompt_lengths(cls, v: list[int]) -> list[int]:
@@ -644,11 +650,28 @@ async def run_benchmark(run: BenchmarkRun, engine_pool: Any) -> None:
     current_test = 0
     overall_start = time.perf_counter()
 
+    # Apply per-run setting overrides (from the bench-tab settings panel) for
+    # the duration of this run only. Persisted model_settings.json is untouched.
+    # Engine-init flags (TurboQuant/DFlash/MTP/...) are picked up because
+    # Phase 2 reloads the model. We enter the context manually inside `try:`
+    # (not via `with`) so the existing try/except body below stays unchanged;
+    # the matching `finally` at the bottom releases it. Entering inside the
+    # try means an exception during __enter__ is caught by the existing
+    # handlers — no leaked override token.
+    sm = getattr(engine_pool, "_settings_manager", None)
+    override_ctx = None
+
     try:
+        if sm is not None and request.settings_override:
+            override_ctx = sm.ephemeral_overrides(
+                request.model_id, request.settings_override
+            )
+            override_ctx.__enter__()
         # Snapshot experimental flags at run start. Settings can change mid-run
         # (user toggling DFlash/SpecPrefill/TurboQuant), and the produced
         # numbers are tied to whatever was active when generation actually ran.
-        sm = getattr(engine_pool, "_settings_manager", None)
+        # With an override active this reflects the merged view, so
+        # override-induced experimental flags also block omlx.ai upload.
         if sm is not None:
             try:
                 s = sm.get_settings(request.model_id)
@@ -860,3 +883,15 @@ async def run_benchmark(run: BenchmarkRun, engine_pool: Any) -> None:
             await engine_pool._unload_engine(request.model_id)
         except Exception:
             pass
+
+    finally:
+        if override_ctx is not None:
+            try:
+                # Pass through the live exception info (if any) so the context
+                # manager sees the same triple Python's `with` would supply.
+                override_ctx.__exit__(*sys.exc_info())
+            except Exception as e:
+                logger.warning(
+                    f"Benchmark: failed to release ephemeral overrides for "
+                    f"{request.model_id}: {e}"
+                )
diff --git a/omlx/admin/static/js/dashboard.js b/omlx/admin/static/js/dashboard.js
index c4831fb21..fc5895054 100644
--- a/omlx/admin/static/js/dashboard.js
+++ b/omlx/admin/static/js/dashboard.js
@@ -392,6 +392,21 @@
             benchTab: 'throughput',
             benchDropdown: false,
 
+            // ---- Bench-tab inline Run-time Settings panel ----
+            // Mirrors modelSettings shape; ephemeral by default, persists only
+            // when the user clicks Save (or Save as Profile).
+            benchSettingsOpen: false,
+            benchSettings: {},          // hydrated from selected model's persisted settings
+            benchSettingsBaseline: {},  // last-hydrated snapshot for dirty detection / reset
+            benchSettingsSaving: false,
+            benchSettingsRecentlySaved: false, // ~1.5s post-save flag → checkmark in Save button
+            benchSettingsSaveProfileOpen: false,
+            benchSettingsNewProfile: { display_name: '', description: '' },
+            benchSettingsStatus: '',    // transient status line (profile created / errors)
+            benchProfiles: [],          // per-model profiles for benchModelId (loaded on hydrate)
+            benchProfileScope: 'model', // 'preset' | 'global' | 'model'
+            benchActiveProfileName: null,
+
             // Accuracy benchmark state
             accModelId: '',
             accBenchmarks: { mmlu: true, mmlu_pro: false, kmmlu: false, cmmlu: false, jmmlu: false, hellaswag: false, truthfulqa: true, arc_challenge: false, winogrande: false, gsm8k: false, mathqa: false, humaneval: true, mbpp: false, livecodebench: false, bbq: false, safetybench: false },
@@ -452,6 +467,19 @@
             accShowText: false,
             accCopied: false,
 
+            // ---- Accuracy-tab inline Run-time Settings panel ----
+            accSettingsOpen: false,
+            accSettings: {},
+            accSettingsBaseline: {},
+            accSettingsSaving: false,
+            accSettingsRecentlySaved: false,
+            accSettingsSaveProfileOpen: false,
+            accSettingsNewProfile: { display_name: '', description: '' },
+            accSettingsStatus: '',
+            accProfiles: [],
+            accProfileScope: 'model',
+            accActiveProfileName: null,
+
             async init() {
                 // Apply theme
                 this.applyTheme();
@@ -475,6 +503,15 @@
                     this.handleMainTabChange(value);
                 });
 
+                // Bench-tab inline settings panels: re-hydrate from server
+                // baseline whenever the selected model changes. Initial hydrate
+                // runs once here so the panel state is well-formed even before
+                // a model is picked.
+                this._panelHydrate('bench');
+                this._panelHydrate('acc');
+                this.$watch('benchModelId', () => this._panelHydrate('bench'));
+                this.$watch('accModelId', () => this._panelHydrate('acc'));
+
                 this.$watch('hfMlxOnly', () => {
                     this.hfRecommended = { trending: [], popular: [] };
                     this.hfRecommendedLoaded = false;
@@ -1202,43 +1239,84 @@
             },
 
             _resetPresetApplicableFields() {
-                // Reset all fields a preset can touch so switching presets does not leave
-                // stale values. Intentionally does NOT touch model_alias / model_type_override
-                // / is_pinned / is_default / turboquant_* / dflash_* / specprefill_* / index_cache_*.
-                const ms = this.modelSettings;
-                ms.temperature = null;
-                ms.top_p = null;
-                ms.top_k = null;
-                ms.min_p = null;
-                ms.repetition_penalty = null;
-                ms.presence_penalty = null;
-                ms.force_sampling = false;
-                ms.max_context_window = null;
-                ms.max_tokens = null;
-                ms.reasoning_parser = null;
-                ms.ttl_seconds = null;
-                ms.enable_thinking = null;
-                ms.enableThinkingBudget = false;
-                ms.thinking_budget_tokens = null;
-                ms.enableToolResultLimit = false;
-                ms.max_tool_result_tokens = null;
-                ms.ctKwargEntries = [];
+                this._resetStatePresetFields(this.modelSettings);
+            },
+
+            // Pure version of _resetPresetApplicableFields — operates on any
+            // state object (modal's modelSettings, bench's benchSettings,
+            // accuracy's accSettings). Resets all fields a preset can touch
+            // so switching presets does not leave stale values. Intentionally
+            // does NOT touch model_alias / model_type_override / is_pinned /
+            // is_default / turboquant_* / dflash_* / specprefill_* / index_cache_*.
+            _resetStatePresetFields(state) {
+                state.temperature = null;
+                state.top_p = null;
+                state.top_k = null;
+                state.min_p = null;
+                state.repetition_penalty = null;
+                state.presence_penalty = null;
+                state.force_sampling = false;
+                state.max_context_window = null;
+                state.max_tokens = null;
+                state.reasoning_parser = null;
+                state.ttl_seconds = null;
+                state.enable_thinking = null;
+                state.enableThinkingBudget = false;
+                state.thinking_budget_tokens = null;
+                state.enableToolResultLimit = false;
+                state.max_tool_result_tokens = null;
+                state.ctKwargEntries = [];
+            },
+
+            // Pure profile/preset merge helpers — write a settings dict into
+            // a target state object (modal or panel) without touching
+            // selectedModel/activeProfileName or calling the backend.
+            // Modal- and panel-side wrappers add their own side effects
+            // (POSTs, status updates) around these.
+            _mergeProfileSettingsIntoState(state, settings) {
+                const fields = this.profileFields.universal.concat(
+                    this.profileFields.model_specific,
+                );
+                for (const k of fields) {
+                    if (!(k in settings)) continue;
+                    if (k === 'thinking_budget_enabled') {
+                        state.enableThinkingBudget = !!settings[k];
+                    } else if (k === 'index_cache_freq') {
+                        state.enableIndexCache = !!settings[k];
+                        state.index_cache_freq = settings[k] || null;
+                    } else if (k === 'max_tool_result_tokens') {
+                        state.enableToolResultLimit = !!settings[k];
+                        state.max_tool_result_tokens = settings[k] || null;
+                    } else if (k === 'chat_template_kwargs' || k === 'forced_ct_kwargs') {
+                        const ctk = settings.chat_template_kwargs || {};
+                        const forced = new Set(settings.forced_ct_kwargs || []);
+                        const entries = [];
+                        for (const [key, value] of Object.entries(ctk)) {
+                            if (key === 'enable_thinking') {
+                                entries.push({type:'enable_thinking', value:String(value), force:forced.has('enable_thinking')});
+                            } else if (key === 'reasoning_effort') {
+                                entries.push({type:'reasoning_effort', value:String(value), force:forced.has('reasoning_effort')});
+                            } else {
+                                entries.push({type:'custom', key, value:String(value), force:forced.has(key)});
+                            }
+                        }
+                        state.ctKwargEntries = entries;
+                    } else {
+                        state[k] = settings[k];
+                    }
+                }
             },
 
-            applyPresetToForm(preset) {
-                // Reset first so previous preset's fields (e.g. presence_penalty) do not stick.
-                this._resetPresetApplicableFields();
-                const s = preset.settings || {};
-                const ms = this.modelSettings;
-                for (const k of Object.keys(s)) {
+            _mergePresetSettingsIntoState(state, settings) {
+                for (const k of Object.keys(settings)) {
                     if (k === 'thinking_budget_enabled') {
-                        ms.enableThinkingBudget = !!s[k];
+                        state.enableThinkingBudget = !!settings[k];
                     } else if (k === 'max_tool_result_tokens') {
-                        ms.enableToolResultLimit = s[k] != null;
-                        ms.max_tool_result_tokens = s[k] ?? null;
+                        state.enableToolResultLimit = settings[k] != null;
+                        state.max_tool_result_tokens = settings[k] ?? null;
                     } else if (k === 'chat_template_kwargs' || k === 'forced_ct_kwargs') {
-                        const ctk = s.chat_template_kwargs || {};
-                        const forced = new Set(s.forced_ct_kwargs || []);
+                        const ctk = settings.chat_template_kwargs || {};
+                        const forced = new Set(settings.forced_ct_kwargs || []);
                         const entries = [];
                         for (const [key, value] of Object.entries(ctk)) {
                             if (key === 'enable_thinking') {
@@ -1249,11 +1327,17 @@
                                 entries.push({type:'custom', key, value:String(value), force:forced.has(key)});
                             }
                         }
-                        ms.ctKwargEntries = entries;
+                        state.ctKwargEntries = entries;
                     } else {
-                        ms[k] = s[k];
+                        state[k] = settings[k];
                     }
                 }
+            },
+
+            applyPresetToForm(preset) {
+                // Reset first so previous preset's fields (e.g. presence_penalty) do not stick.
+                this._resetStatePresetFields(this.modelSettings);
+                this._mergePresetSettingsIntoState(this.modelSettings, preset.settings || {});
                 this.activeProfileName = null;
                 this.profilesDrift = false;
             },
@@ -1304,37 +1388,7 @@
             },
             async applyProfileToForm(profile) {
                 // Merge all profile fields into the form (no server call — user clicks Save to persist).
-                const s = profile.settings || {};
-                const ms = this.modelSettings;
-                for (const k of this.profileFields.universal.concat(this.profileFields.model_specific)) {
-                    if (!(k in s)) continue;
-                    if (k === 'thinking_budget_enabled') {
-                        ms.enableThinkingBudget = !!s[k];
-                    } else if (k === 'index_cache_freq') {
-                        ms.enableIndexCache = !!s[k];
-                        ms.index_cache_freq = s[k] || null;
-                    } else if (k === 'max_tool_result_tokens') {
-                        ms.enableToolResultLimit = !!s[k];
-                        ms.max_tool_result_tokens = s[k] || null;
-                    } else if (k === 'chat_template_kwargs' || k === 'forced_ct_kwargs') {
-                        // Rebuild ctKwargEntries
-                        const ctk = s.chat_template_kwargs || {};
-                        const forced = new Set(s.forced_ct_kwargs || []);
-                        const entries = [];
-                        for (const [key, value] of Object.entries(ctk)) {
-                            if (key === 'enable_thinking') {
-                                entries.push({type:'enable_thinking', value:String(value), force:forced.has('enable_thinking')});
-                            } else if (key === 'reasoning_effort') {
-                                entries.push({type:'reasoning_effort', value:String(value), force:forced.has('reasoning_effort')});
-                            } else {
-                                entries.push({type:'custom', key, value:String(value), force:forced.has(key)});
-                            }
-                        }
-                        ms.ctKwargEntries = entries;
-                    } else {
-                        ms[k] = s[k];
-                    }
-                }
+                this._mergeProfileSettingsIntoState(this.modelSettings, profile.settings || {});
                 // Persist active_profile_name to backend before updating UI state
                 const seq = ++this._applySeq;
                 try {
@@ -2481,6 +2535,12 @@
                             prompt_lengths: promptLengths,
                             generation_length: 128,
                             batch_sizes: batchSizes,
+                            // Ship inline panel edits as a per-run override.
+                            // null when nothing's been touched, so the run uses
+                            // whatever's persisted in model_settings.json.
+                            settings_override: this.benchSettingsDirty()
+                                ? this._settingsStateToPayload(this.benchSettings)
+                                : null,
                         }),
                     });
 
@@ -2787,6 +2847,12 @@
                             ),
                             batch_size: this.accBatchSize,
                             enable_thinking: this.accEnableThinking,
+                            // Ship inline panel edits as a per-run override.
+                            // null when nothing's been touched, so the run uses
+                            // whatever's persisted in model_settings.json.
+                            settings_override: this.accSettingsDirty()
+                                ? this._settingsStateToPayload(this.accSettings)
+                                : null,
                         }),
                     });
                     if (!resp.ok) {
@@ -4779,5 +4845,469 @@
                     this.msModelDetailLoading = false;
                 }
             },
+
+            // =================================================================
+            // Bench-tab inline Run-time Settings panel.
+            //
+            // Mirrors modelSettings shape and serialization, but binds to
+            // benchSettings / accSettings so edits don't pollute the modal's
+            // form. Edits are ephemeral by default — they ship as a
+            // settings_override on the bench request and revert on the next
+            // model selection. Save / Save-as-Profile reuse the existing
+            // /api/models/{id}/settings and /api/models/{id}/profiles
+            // endpoints used by the modal.
+            // =================================================================
+
+            // Empty/default panel state. Keep aligned with openModelSettings's
+            // assignment to this.modelSettings (dashboard.js:~1525).
+            _emptySettingsState() {
+                return {
+                    model_alias: '',
+                    model_type_override: '',
+                    max_context_window: null,
+                    max_tokens: null,
+                    temperature: null,
+                    top_p: null,
+                    top_k: null,
+                    repetition_penalty: null,
+                    min_p: null,
+                    presence_penalty: null,
+                    force_sampling: false,
+                    enable_thinking: null,
+                    thinking_default: null,
+                    enableThinkingBudget: false,
+                    thinking_budget_tokens: null,
+                    enableToolResultLimit: false,
+                    max_tool_result_tokens: null,
+                    reasoning_parser: '',
+                    ttl_seconds: null,
+                    enableIndexCache: false,
+                    index_cache_freq: null,
+                    turboquant_kv_enabled: false,
+                    turboquant_kv_bits: 4,
+                    specprefill_enabled: false,
+                    specprefill_draft_model: '',
+                    specprefill_keep_pct: '0.2',
+                    specprefill_threshold: null,
+                    dflash_enabled: false,
+                    dflash_draft_model: '',
+                    dflash_draft_quant_bits: '',
+                    dflash_max_ctx: null,
+                    dflash_in_memory_cache: true,
+                    dflash_in_memory_cache_max_entries: 4,
+                    dflash_in_memory_cache_max_gib: 8,
+                    dflash_ssd_cache: false,
+                    dflash_compatible: true,
+                    dflash_compatibility_reason: '',
+                    dflash_ssd_cache_available: false,
+                    mtp_enabled: false,
+                    mtp_compatible: false,
+                    mtp_compatibility_reason: '',
+                    vlm_mtp_enabled: false,
+                    vlm_mtp_draft_model: '',
+                    vlm_mtp_draft_block_size: null,
+                    ctKwargEntries: [],
+                    trust_remote_code: false,
+                };
+            },
+
+            // Build a settings state from a model entry. Mirrors the
+            // assignment in openModelSettings; kept separate to avoid
+            // disturbing the modal's existing flow.
+            _modelToSettingsState(model) {
+                if (!model) return this._emptySettingsState();
+                const settings = model.settings || {};
+                const ctk = settings.chat_template_kwargs || {};
+                const forcedKeys = new Set(settings.forced_ct_kwargs || []);
+                const ctKwargEntries = [];
+                for (const [key, value] of Object.entries(ctk)) {
+                    if (key === 'enable_thinking') {
+                        ctKwargEntries.push({type: 'enable_thinking', value: String(value), force: forcedKeys.has('enable_thinking')});
+                    } else if (key === 'reasoning_effort') {
+                        ctKwargEntries.push({type: 'reasoning_effort', value: String(value), force: forcedKeys.has('reasoning_effort')});
+                    } else {
+                        ctKwargEntries.push({type: 'custom', key, value: String(value), force: forcedKeys.has(key)});
+                    }
+                }
+                const isOcr = OCR_CONFIG_MODEL_TYPES.has(model.config_model_type || '');
+                return {
+                    model_alias: settings.model_alias || '',
+                    model_type_override: settings.model_type_override || '',
+                    max_context_window: settings.max_context_window || null,
+                    max_tokens: settings.max_tokens || null,
+                    temperature: isOcr ? 0.0 : (settings.temperature ?? null),
+                    top_p: settings.top_p ?? null,
+                    top_k: settings.top_k ?? null,
+                    repetition_penalty: settings.repetition_penalty ?? null,
+                    min_p: settings.min_p ?? null,
+                    presence_penalty: settings.presence_penalty ?? null,
+                    force_sampling: settings.force_sampling || false,
+                    enable_thinking: settings.enable_thinking ?? null,
+                    thinking_default: model.thinking_default ?? null,
+                    enableThinkingBudget: !!(settings.thinking_budget_tokens),
+                    thinking_budget_tokens: settings.thinking_budget_tokens || null,
+                    enableToolResultLimit: !!(settings.max_tool_result_tokens),
+                    max_tool_result_tokens: settings.max_tool_result_tokens || null,
+                    reasoning_parser: settings.reasoning_parser || '',
+                    ttl_seconds: settings.ttl_seconds ?? null,
+                    enableIndexCache: !!(settings.index_cache_freq),
+                    index_cache_freq: settings.index_cache_freq || null,
+                    turboquant_kv_enabled: settings.turboquant_kv_enabled || false,
+                    turboquant_kv_bits: settings.turboquant_kv_bits || 4,
+                    specprefill_enabled: settings.specprefill_enabled || false,
+                    specprefill_draft_model: settings.specprefill_draft_model || '',
+                    specprefill_keep_pct: settings.specprefill_keep_pct ? String(settings.specprefill_keep_pct) : '0.2',
+                    specprefill_threshold: settings.specprefill_threshold || null,
+                    dflash_enabled: settings.dflash_enabled || false,
+                    dflash_draft_model: settings.dflash_draft_model || '',
+                    dflash_draft_quant_bits: settings.dflash_draft_quant_bits ? String(settings.dflash_draft_quant_bits) : '',
+                    dflash_max_ctx: settings.dflash_max_ctx ?? null,
+                    dflash_in_memory_cache: settings.dflash_in_memory_cache !== false,
+                    dflash_in_memory_cache_max_entries: settings.dflash_in_memory_cache_max_entries || 4,
+                    dflash_in_memory_cache_max_gib: settings.dflash_in_memory_cache_max_bytes
+                        ? Math.round(settings.dflash_in_memory_cache_max_bytes / (1024 ** 3))
+                        : 8,
+                    dflash_ssd_cache: settings.dflash_ssd_cache || false,
+                    dflash_compatible: model.dflash_compatible !== false,
+                    dflash_compatibility_reason: model.dflash_compatibility_reason || '',
+                    dflash_ssd_cache_available: !!model.dflash_ssd_cache_available,
+                    mtp_enabled: settings.mtp_enabled || false,
+                    mtp_compatible: model.mtp_compatible === true,
+                    mtp_compatibility_reason: model.mtp_compatibility_reason || '',
+                    vlm_mtp_enabled: settings.vlm_mtp_enabled || false,
+                    vlm_mtp_draft_model: settings.vlm_mtp_draft_model || '',
+                    vlm_mtp_draft_block_size: settings.vlm_mtp_draft_block_size ?? null,
+                    ctKwargEntries,
+                    trust_remote_code: settings.trust_remote_code || false,
+                };
+            },
+
+            // Convert a settings state object back to the wire payload used
+            // by both PUT /api/models/{id}/settings (full save) and the bench
+            // request body's settings_override. Mirrors saveModelSettings's
+            // IIFE (dashboard.js:~1582). Kept separate so changes to the
+            // modal don't quietly affect bench overrides.
+            _settingsStateToPayload(s) {
+                const chatTemplateKwargs = {};
+                const forcedCtKwargs = [];
+                for (const entry of (s.ctKwargEntries || [])) {
+                    if (entry.type === 'enable_thinking') {
+                        chatTemplateKwargs.enable_thinking = entry.value === 'true';
+                        if (entry.force) forcedCtKwargs.push('enable_thinking');
+                    } else if (entry.type === 'reasoning_effort') {
+                        chatTemplateKwargs.reasoning_effort = entry.value;
+                        if (entry.force) forcedCtKwargs.push('reasoning_effort');
+                    } else if (entry.type === 'custom' && entry.key && entry.key.trim()) {
+                        let val = entry.value;
+                        if (val === 'true') val = true;
+                        else if (val === 'false') val = false;
+                        else if (typeof val === 'string' && val.trim() !== '' && !isNaN(Number(val))) val = Number(val);
+                        const key = entry.key.trim();
+                        chatTemplateKwargs[key] = val;
+                        if (entry.force) forcedCtKwargs.push(key);
+                    }
+                }
+                return {
+                    model_alias: s.model_alias?.trim() || null,
+                    model_type_override: s.model_type_override || null,
+                    max_context_window: s.max_context_window || null,
+                    max_tokens: s.max_tokens || null,
+                    temperature: Number.isFinite(s.temperature) ? s.temperature : null,
+                    top_p: Number.isFinite(s.top_p) ? s.top_p : null,
+                    top_k: Number.isFinite(s.top_k) ? s.top_k : null,
+                    repetition_penalty: Number.isFinite(s.repetition_penalty) ? s.repetition_penalty : null,
+                    min_p: Number.isFinite(s.min_p) ? s.min_p : null,
+                    presence_penalty: Number.isFinite(s.presence_penalty) ? s.presence_penalty : null,
+                    force_sampling: s.force_sampling,
+                    reasoning_parser: s.reasoning_parser || null,
+                    // Preserve explicit 0 (means "no TTL"); only fall to null
+                    // when the value is missing/non-finite.
+                    ttl_seconds: Number.isFinite(s.ttl_seconds) ? s.ttl_seconds : null,
+                    index_cache_freq: s.enableIndexCache ? (s.index_cache_freq || 4) : 0,
+                    enable_thinking: s.enable_thinking,
+                    thinking_budget_enabled: s.enableThinkingBudget,
+                    thinking_budget_tokens: s.enableThinkingBudget ? (s.thinking_budget_tokens || null) : 0,
+                    max_tool_result_tokens: s.enableToolResultLimit ? (s.max_tool_result_tokens || null) : 0,
+                    chat_template_kwargs: Object.keys(chatTemplateKwargs).length > 0 ? chatTemplateKwargs : null,
+                    forced_ct_kwargs: forcedCtKwargs.length > 0 ? forcedCtKwargs : null,
+                    turboquant_kv_enabled: s.turboquant_kv_enabled,
+                    turboquant_kv_bits: s.turboquant_kv_enabled ? (parseFloat(s.turboquant_kv_bits) || 4) : 4,
+                    specprefill_enabled: s.specprefill_enabled,
+                    specprefill_draft_model: s.specprefill_draft_model || null,
+                    specprefill_keep_pct: s.specprefill_enabled ? (parseFloat(s.specprefill_keep_pct) || 0.2) : null,
+                    specprefill_threshold: s.specprefill_enabled ? (s.specprefill_threshold || null) : null,
+                    dflash_enabled: s.dflash_enabled,
+                    dflash_draft_model: s.dflash_draft_model || null,
+                    dflash_draft_quant_bits: s.dflash_enabled && s.dflash_draft_quant_bits ? parseInt(s.dflash_draft_quant_bits) : null,
+                    dflash_max_ctx: s.dflash_enabled && s.dflash_max_ctx ? parseInt(s.dflash_max_ctx) : null,
+                    dflash_in_memory_cache: s.dflash_enabled ? !!s.dflash_in_memory_cache : true,
+                    dflash_in_memory_cache_max_entries: s.dflash_enabled
+                        ? (parseInt(s.dflash_in_memory_cache_max_entries) || 4)
+                        : 4,
+                    dflash_in_memory_cache_max_bytes: s.dflash_enabled
+                        ? Math.max(1, parseInt(s.dflash_in_memory_cache_max_gib) || 8) * (1024 ** 3)
+                        : 8 * (1024 ** 3),
+                    dflash_ssd_cache: s.dflash_enabled
+                        && !!s.dflash_in_memory_cache
+                        && !!s.dflash_ssd_cache_available
+                        && !!s.dflash_ssd_cache,
+                    mtp_enabled: !!s.mtp_enabled,
+                    vlm_mtp_enabled: !!s.vlm_mtp_enabled,
+                    vlm_mtp_draft_model: s.vlm_mtp_enabled
+                        ? (s.vlm_mtp_draft_model || null)
+                        : null,
+                    vlm_mtp_draft_block_size: s.vlm_mtp_enabled && s.vlm_mtp_draft_block_size
+                        ? parseInt(s.vlm_mtp_draft_block_size)
+                        : null,
+                    trust_remote_code: !!s.trust_remote_code,
+                };
+            },
+
+            // Engine-init-class fields. Toggling these requires the engine
+            // to be reloaded. Both bench tabs reload the model on every run
+            // so it costs nothing here, but we surface a hint in the panel.
+            _ENGINE_INIT_KEYS: [
+                'turboquant_kv_enabled', 'turboquant_kv_bits',
+                'dflash_enabled', 'dflash_draft_model', 'dflash_draft_quant_bits',
+                'dflash_max_ctx', 'dflash_in_memory_cache',
+                'dflash_in_memory_cache_max_entries',
+                'dflash_in_memory_cache_max_gib', 'dflash_ssd_cache',
+                'specprefill_enabled', 'specprefill_draft_model',
+                'specprefill_keep_pct', 'specprefill_threshold',
+                'mtp_enabled',
+                'vlm_mtp_enabled', 'vlm_mtp_draft_model', 'vlm_mtp_draft_block_size',
+                'enableIndexCache', 'index_cache_freq',
+                'reasoning_parser', 'model_type_override', 'trust_remote_code',
+            ],
+
+            // Generic panel ops keyed by prefix ('bench' | 'acc'). Each tab
+            // exposes thin wrappers below for templates to call by name.
+            _panelHydrate(prefix) {
+                const modelIdField = prefix === 'bench' ? 'benchModelId' : 'accModelId';
+                const modelId = this[modelIdField];
+                const model = (this.models || []).find(m => m.id === modelId) || null;
+                const state = this._modelToSettingsState(model);
+                this[prefix + 'Settings'] = state;
+                this[prefix + 'SettingsBaseline'] = JSON.parse(JSON.stringify(state));
+                this[prefix + 'SettingsStatus'] = '';
+                this[prefix + 'SettingsSaveProfileOpen'] = false;
+                // Echo modal's "active profile" tracking so the matching pill
+                // can render highlighted when the bench model has one set.
+                this[prefix + 'ActiveProfileName'] = (model && model.settings &&
+                    model.settings.active_profile_name) || null;
+
+                // Pull the model's profile list so the pills row can render.
+                // Templates + presets are shared globals already loaded at init().
+                this[prefix + 'Profiles'] = [];
+                if (modelId) {
+                    fetch(`/admin/api/models/${encodeURIComponent(modelId)}/profiles`)
+                        .then(async r => {
+                            // Honor the session-expiry redirect contract used
+                            // elsewhere in the dashboard for /admin/api fetches.
+                            if (r.status === 401) {
+                                window.location.href = '/admin';
+                                return null;
+                            }
+                            if (!r.ok) {
+                                this[prefix + 'SettingsStatus'] =
+                                    `Failed to load profiles (HTTP ${r.status}).`;
+                                return null;
+                            }
+                            return r.json();
+                        })
+                        .then(data => {
+                            if (data && this[modelIdField] === modelId) {
+                                this[prefix + 'Profiles'] = data.profiles || [];
+                            }
+                        })
+                        .catch(e => {
+                            console.error(`Failed to load ${prefix} profiles:`, e);
+                            this[prefix + 'SettingsStatus'] =
+                                `Failed to load profiles: ${e}`;
+                        });
+                }
+            },
+            _panelDirty(prefix) {
+                const cur = this[prefix + 'Settings'];
+                const base = this[prefix + 'SettingsBaseline'];
+                if (!cur || !base) return false;
+                return JSON.stringify(cur) !== JSON.stringify(base);
+            },
+            _panelRequiresReload(prefix) {
+                const cur = this[prefix + 'Settings'];
+                const base = this[prefix + 'SettingsBaseline'];
+                if (!cur || !base) return false;
+                for (const k of this._ENGINE_INIT_KEYS) {
+                    if (JSON.stringify(cur[k]) !== JSON.stringify(base[k])) return true;
+                }
+                return false;
+            },
+            _panelReset(prefix) {
+                this[prefix + 'Settings'] = JSON.parse(JSON.stringify(this[prefix + 'SettingsBaseline']));
+                this[prefix + 'SettingsStatus'] = '';
+            },
+            async _panelSave(prefix) {
+                const modelIdField = prefix === 'bench' ? 'benchModelId' : 'accModelId';
+                const modelId = this[modelIdField];
+                if (!modelId) return;
+                this[prefix + 'SettingsSaving'] = true;
+                this[prefix + 'SettingsStatus'] = '';
+                try {
+                    const payload = this._settingsStateToPayload(this[prefix + 'Settings']);
+                    const response = await fetch(
+                        `/admin/api/models/${encodeURIComponent(modelId)}/settings`,
+                        { method: 'PUT', headers: { 'Content-Type': 'application/json' },
+                          body: JSON.stringify(payload) }
+                    );
+                    if (response.ok) {
+                        await this.loadModels();
+                        // Re-hydrate baseline from the refreshed model entry so
+                        // dirty flips back to false after a successful save.
+                        this._panelHydrate(prefix);
+                        this[prefix + 'SettingsRecentlySaved'] = true;
+                        setTimeout(() => {
+                            this[prefix + 'SettingsRecentlySaved'] = false;
+                        }, 1500);
+                    } else if (response.status === 401) {
+                        window.location.href = '/admin';
+                    } else {
+                        const data = await response.json().catch(() => ({}));
+                        this[prefix + 'SettingsStatus'] = data.detail || 'Save failed.';
+                    }
+                } catch (err) {
+                    console.error(`Failed to save ${prefix} settings:`, err);
+                    this[prefix + 'SettingsStatus'] = `Save failed: ${err.message}`;
+                } finally {
+                    this[prefix + 'SettingsSaving'] = false;
+                }
+            },
+            _panelSaveAsProfile(prefix) {
+                const modelIdField = prefix === 'bench' ? 'benchModelId' : 'accModelId';
+                if (!this[modelIdField]) return;
+                this[prefix + 'SettingsNewProfile'] = { display_name: '', description: '' };
+                this[prefix + 'SettingsSaveProfileOpen'] = true;
+                this[prefix + 'SettingsStatus'] = '';
+            },
+            async _panelSaveAsProfileConfirm(prefix) {
+                const modelIdField = prefix === 'bench' ? 'benchModelId' : 'accModelId';
+                const modelId = this[modelIdField];
+                const np = this[prefix + 'SettingsNewProfile'];
+                if (!modelId || !np?.display_name?.trim()) return;
+                const autoId = 'p-' + Date.now().toString(36) + '-' +
+                               Math.random().toString(36).slice(2, 6);
+                // formValuesForProfile() reads this.modelSettings; aim it at
+                // the panel state for one call, then restore. Avoids
+                // re-implementing the chat_template_kwargs flattening logic.
+                const savedMs = this.modelSettings;
+                this.modelSettings = this[prefix + 'Settings'];
+                let profileSettings;
+                try {
+                    profileSettings = this.formValuesForProfile();
+                } finally {
+                    this.modelSettings = savedMs;
+                }
+                const body = {
+                    name: autoId,
+                    display_name: np.display_name.trim(),
+                    description: np.description?.trim() || null,
+                    settings: profileSettings,
+                    also_save_as_template: false,
+                };
+                try {
+                    const r = await fetch(
+                        `/admin/api/models/${encodeURIComponent(modelId)}/profiles`,
+                        { method: 'POST', headers: { 'Content-Type': 'application/json' },
+                          body: JSON.stringify(body) }
+                    );
+                    if (r.ok) {
+                        this[prefix + 'SettingsSaveProfileOpen'] = false;
+                        this[prefix + 'SettingsStatus'] = `Profile "${np.display_name.trim()}" created.`;
+                        // Refresh the panel's profile list so the new pill
+                        // shows up immediately. Don't re-hydrate everything;
+                        // just refetch the profiles for this model.
+                        try {
+                            const pr = await fetch(`/admin/api/models/${encodeURIComponent(modelId)}/profiles`);
+                            if (pr.ok) {
+                                const pd = await pr.json();
+                                this[prefix + 'Profiles'] = pd.profiles || [];
+                            }
+                        } catch (_) { /* non-fatal */ }
+                    } else if (r.status === 401) {
+                        window.location.href = '/admin';
+                    } else {
+                        const data = await r.json().catch(() => ({}));
+                        this[prefix + 'SettingsStatus'] = data.detail || 'Profile save failed.';
+                    }
+                } catch (e) {
+                    this[prefix + 'SettingsStatus'] = String(e);
+                }
+            },
+
+            // Apply a profile / template / preset to the panel state.
+            // These are intentionally ephemeral by default — they mutate
+            // only the panel's state object via pure merge helpers, and
+            // never call applyProfileToForm/applyTemplateToForm (which
+            // POST to /admin/api/models/{selectedModel.id}/...) — those
+            // would target the modal's selectedModel and persist server-
+            // side, breaking the ephemeral-override contract and possibly
+            // hitting the wrong model entirely.
+            _panelApplyProfile(prefix, profile) {
+                this._mergeProfileSettingsIntoState(
+                    this[prefix + 'Settings'],
+                    profile.settings || {},
+                );
+                this[prefix + 'ActiveProfileName'] = profile.name;
+                this[prefix + 'SettingsStatus'] = `Applied profile "${profile.display_name || profile.name}".`;
+            },
+            _panelApplyTemplate(prefix, template) {
+                // Templates carry the same shape as profiles. We merge them
+                // directly into panel state — unlike applyTemplateToForm, we
+                // do NOT create a server-side profile from the template
+                // (that's a write-action the user hasn't asked for from
+                // the bench panels).
+                this._mergeProfileSettingsIntoState(
+                    this[prefix + 'Settings'],
+                    template.settings || {},
+                );
+                // Templates aren't profiles — clear the active-profile highlight.
+                this[prefix + 'ActiveProfileName'] = null;
+                this[prefix + 'SettingsStatus'] = `Applied template "${template.display_name || template.name}".`;
+            },
+            _panelApplyPreset(prefix, preset) {
+                const state = this[prefix + 'Settings'];
+                // Reset first so previous preset's fields don't bleed through.
+                this._resetStatePresetFields(state);
+                this._mergePresetSettingsIntoState(state, preset.settings || {});
+                this[prefix + 'ActiveProfileName'] = null;
+                this[prefix + 'SettingsStatus'] = `Applied preset "${preset.display_name || preset.name}".`;
+            },
+
+            // ---- Bench (throughput) tab — template-facing API ----
+            get benchSelectedModel() {
+                return (this.models || []).find(m => m.id === this.benchModelId) || null;
+            },
+            benchSettingsDirty() { return this._panelDirty('bench'); },
+            benchSettingsRequiresReload() { return this._panelRequiresReload('bench'); },
+            benchSettingsReset() { this._panelReset('bench'); },
+            benchSettingsSave() { return this._panelSave('bench'); },
+            benchSettingsSaveAsProfile() { this._panelSaveAsProfile('bench'); },
+            benchSettingsSaveAsProfileConfirm() { return this._panelSaveAsProfileConfirm('bench'); },
+            benchApplyProfile(p) { return this._panelApplyProfile('bench', p); },
+            benchApplyTemplate(t) { return this._panelApplyTemplate('bench', t); },
+            benchApplyPreset(p) { this._panelApplyPreset('bench', p); },
+
+            // ---- Accuracy tab — template-facing API ----
+            get accSelectedModel() {
+                return (this.models || []).find(m => m.id === this.accModelId) || null;
+            },
+            accSettingsDirty() { return this._panelDirty('acc'); },
+            accSettingsRequiresReload() { return this._panelRequiresReload('acc'); },
+            accSettingsReset() { this._panelReset('acc'); },
+            accSettingsSave() { return this._panelSave('acc'); },
+            accSettingsSaveAsProfile() { this._panelSaveAsProfile('acc'); },
+            accSettingsSaveAsProfileConfirm() { return this._panelSaveAsProfileConfirm('acc'); },
+            accApplyProfile(p) { return this._panelApplyProfile('acc', p); },
+            accApplyTemplate(t) { return this._panelApplyTemplate('acc', t); },
+            accApplyPreset(p) { this._panelApplyPreset('acc', p); },
         }
     }
diff --git a/omlx/admin/templates/base.html b/omlx/admin/templates/base.html
index d8c3a2c9a..ac4493505 100644
--- a/omlx/admin/templates/base.html
+++ b/omlx/admin/templates/base.html
@@ -129,33 +129,80 @@
             }
 
             function replaceIcon(el) {
-                var name = el.getAttribute('data-lucide');
-                if (!name) return;
-                var def = lucide.icons[toPascal(name)];
-                if (!def) return;
-                var svg = lucide.createElement(def);
-                Array.from(el.attributes).forEach(function(a) {
-                    if (a.name !== 'data-lucide') svg.setAttribute(a.name, a.value);
-                });
-                svg.classList.add('lucide', 'lucide-' + name);
-                if (el.parentNode) el.parentNode.replaceChild(svg, el);
+                // Per-icon try/catch: a single failure must NOT abort the rest
+                // of the pass (forEach does not isolate iteration exceptions,
+                // so one throw here previously left every later <i data-lucide>
+                // unprocessed — a second-tab regression).
+                try {
+                    var name = el.getAttribute('data-lucide');
+                    if (!name) return;
+                    var def = lucide.icons[toPascal(name)];
+                    if (!def) return;
+                    var svg = lucide.createElement(def);
+                    Array.from(el.attributes).forEach(function(a) {
+                        if (a.name !== 'data-lucide') svg.setAttribute(a.name, a.value);
+                    });
+                    svg.classList.add('lucide', 'lucide-' + name);
+                    if (el.parentNode) el.parentNode.replaceChild(svg, el);
+                } catch (e) {
+                    if (window.console && console.warn) {
+                        console.warn('lucide: failed to render icon', el, e);
+                    }
+                }
             }
 
             function processAll() {
                 document.querySelectorAll('i[data-lucide]').forEach(replaceIcon);
             }
 
-            // Initial pass
+            // Initial pass.
             if (document.readyState === 'loading') {
                 document.addEventListener('DOMContentLoaded', processAll);
             } else {
                 processAll();
             }
 
-            // Poll for dynamically added icons (Alpine x-for, x-if, modals).
-            setInterval(function() {
-                if (document.querySelector('i[data-lucide]')) processAll();
-            }, 300);
+            // MutationObserver replaces the previous 300ms poll. It reacts
+            // immediately to:
+            //   - newly inserted <i data-lucide> nodes (Alpine x-for / x-if,
+            //     modal open, dynamically rendered partials)
+            //   - attribute mutations on existing <i> elements (Alpine
+            //     :data-lucide bindings resolving once the component mounts).
+            // No idle CPU cost, and zero latency between DOM availability and
+            // icon replacement.
+            function startObserving() {
+                var observer = new MutationObserver(function(mutations) {
+                    for (var i = 0; i < mutations.length; i++) {
+                        var m = mutations[i];
+                        if (m.type === 'childList') {
+                            m.addedNodes.forEach(function(node) {
+                                if (node.nodeType !== 1) return;
+                                if (node.matches && node.matches('i[data-lucide]')) {
+                                    replaceIcon(node);
+                                }
+                                if (node.querySelectorAll) {
+                                    node.querySelectorAll('i[data-lucide]').forEach(replaceIcon);
+                                }
+                            });
+                        } else if (m.type === 'attributes' &&
+                                   m.attributeName === 'data-lucide' &&
+                                   m.target.tagName === 'I') {
+                            replaceIcon(m.target);
+                        }
+                    }
+                });
+                observer.observe(document.body, {
+                    childList: true,
+                    subtree: true,
+                    attributes: true,
+                    attributeFilter: ['data-lucide']
+                });
+            }
+            if (document.readyState === 'loading') {
+                document.addEventListener('DOMContentLoaded', startObserving);
+            } else {
+                startObserving();
+            }
         })();
     </script>
 
diff --git a/omlx/admin/templates/dashboard.html b/omlx/admin/templates/dashboard.html
index 23be93ab4..8847e2a62 100644
--- a/omlx/admin/templates/dashboard.html
+++ b/omlx/admin/templates/dashboard.html
@@ -39,6 +39,21 @@
     <!-- Model Settings Modal -->
     {% include "dashboard/_modal_model_settings.html" %}
 
+    <!-- Run-time Settings Modals (one per bench tab) -->
+    {% from "dashboard/_modal_bench_settings.html" import bench_settings_modal %}
+    {{ bench_settings_modal(
+        'bench',
+        run_fn='startBenchmark()',
+        run_disabled_expr='!benchModelId || Object.values(benchPromptLengths).every(v => !v) || benchRunning',
+        placeholder='e.g. low-temp-deterministic'
+    ) }}
+    {{ bench_settings_modal(
+        'acc',
+        run_fn='addToAccQueue()',
+        run_disabled_expr='!accModelId || !Object.values(accBenchmarks).some(v => v) || accRunning',
+        placeholder='e.g. high-recall-mmlu'
+    ) }}
+
     <!-- HF Mirror Settings Modal -->
     <div x-show="showHfMirrorModal" x-cloak
          class="fixed inset-0 z-50 overflow-y-auto"
diff --git a/omlx/admin/templates/dashboard/_bench.html b/omlx/admin/templates/dashboard/_bench.html
index e2a31432a..cbe6a0b3a 100644
--- a/omlx/admin/templates/dashboard/_bench.html
+++ b/omlx/admin/templates/dashboard/_bench.html
@@ -59,6 +59,15 @@ <h3 class="text-2xl font-bold tracking-tight text-neutral-900">{{ t('bench.headi
                                 <i data-lucide="settings-2" class="w-4 h-4 text-neutral-500"></i>
                                 <span class="text-xs font-bold uppercase tracking-wider text-neutral-600">{{ t('bench.config.section_label') }}</span>
                             </div>
+                            <button type="button"
+                                    @click="benchSettingsOpen = true"
+                                    :disabled="!benchModelId"
+                                    class="px-3 py-1.5 text-xs font-medium text-neutral-700 bg-white border border-neutral-200 hover:bg-neutral-50 hover:border-neutral-300 rounded-lg transition-all disabled:opacity-40 disabled:cursor-not-allowed flex items-center gap-1.5">
+                                <i data-lucide="sliders" class="w-3.5 h-3.5 text-neutral-500"></i>
+                                <span>Run-time settings</span>
+                                <span x-show="benchSettingsDirty()" x-cloak
+                                      class="text-[10px] px-1.5 py-0.5 rounded-full bg-amber-100 text-amber-700 font-bold uppercase tracking-wider">modified</span>
+                            </button>
                         </div>
 
                         <div class="px-6 py-5 space-y-5">
diff --git a/omlx/admin/templates/dashboard/_bench_accuracy.html b/omlx/admin/templates/dashboard/_bench_accuracy.html
index 4dac894dd..b60091b31 100644
--- a/omlx/admin/templates/dashboard/_bench_accuracy.html
+++ b/omlx/admin/templates/dashboard/_bench_accuracy.html
@@ -18,6 +18,15 @@ <h3 class="text-2xl font-bold tracking-tight text-neutral-900">{{ t('acc_bench.h
                                 <i data-lucide="settings-2" class="w-4 h-4 text-neutral-500"></i>
                                 <span class="text-xs font-bold uppercase tracking-wider text-neutral-600">{{ t('acc_bench.config.section_label') }}</span>
                             </div>
+                            <button type="button"
+                                    @click="accSettingsOpen = true"
+                                    :disabled="!accModelId"
+                                    class="px-3 py-1.5 text-xs font-medium text-neutral-700 bg-white border border-neutral-200 hover:bg-neutral-50 hover:border-neutral-300 rounded-lg transition-all disabled:opacity-40 disabled:cursor-not-allowed flex items-center gap-1.5">
+                                <i data-lucide="sliders" class="w-3.5 h-3.5 text-neutral-500"></i>
+                                <span>Run-time settings</span>
+                                <span x-show="accSettingsDirty()" x-cloak
+                                      class="text-[10px] px-1.5 py-0.5 rounded-full bg-amber-100 text-amber-700 font-bold uppercase tracking-wider">modified</span>
+                            </button>
                         </div>
 
                         <div class="px-6 py-5 space-y-5">
diff --git a/omlx/admin/templates/dashboard/_modal_bench_settings.html b/omlx/admin/templates/dashboard/_modal_bench_settings.html
new file mode 100644
index 000000000..204713f44
--- /dev/null
+++ b/omlx/admin/templates/dashboard/_modal_bench_settings.html
@@ -0,0 +1,228 @@
+{# Run-time settings modal for the benchmark tabs.
+
+   Renders the same field set as the model-settings modal, but bound to a
+   per-tab state object (benchSettings or accSettings). Changes are
+   ephemeral by default — applied to the next bench run only — with
+   explicit Save / Save-as-Profile / Run / Reset actions.
+
+   Parameters:
+     prefix             'bench' or 'acc' — used to derive every Alpine binding
+                        (e.g. benchSettingsOpen, accSettingsReset(), …).
+     run_fn             JS expression invoked by the Run benchmark button
+                        (e.g. 'startBenchmark()').
+     run_disabled_expr  JS expression that evaluates true when the Run
+                        benchmark button must be disabled.
+     placeholder        Suggested profile name shown in the inline create form.
+#}
+{% from "dashboard/_settings_fields.html" import settings_fields %}
+
+{% macro bench_settings_modal(prefix, run_fn, run_disabled_expr, placeholder='') -%}
+{%- set state           = prefix ~ 'Settings' -%}
+{%- set selected_model  = prefix ~ 'SelectedModel' -%}
+{%- set running         = prefix ~ 'Running' -%}
+{%- set open_flag       = prefix ~ 'SettingsOpen' -%}
+{%- set saving          = prefix ~ 'SettingsSaving' -%}
+{%- set recently_saved  = prefix ~ 'SettingsRecentlySaved' -%}
+{%- set status          = prefix ~ 'SettingsStatus' -%}
+{%- set save_open       = prefix ~ 'SettingsSaveProfileOpen' -%}
+{%- set new_profile     = prefix ~ 'SettingsNewProfile' -%}
+{%- set save_confirm    = prefix ~ 'SettingsSaveAsProfileConfirm()' -%}
+{%- set save_cancel     = prefix ~ 'SettingsSaveProfileOpen = false' -%}
+{%- set dirty_fn        = prefix ~ 'SettingsDirty()' -%}
+{%- set reload_fn       = prefix ~ 'SettingsRequiresReload()' -%}
+{%- set reset_fn        = prefix ~ 'SettingsReset()' -%}
+{%- set save_fn         = prefix ~ 'SettingsSave()' -%}
+{%- set save_as_fn      = prefix ~ 'SettingsSaveAsProfile()' -%}
+{%- set profiles        = prefix ~ 'Profiles' -%}
+{%- set scope           = prefix ~ 'ProfileScope' -%}
+{%- set active_profile  = prefix ~ 'ActiveProfileName' -%}
+{%- set apply_preset    = prefix ~ 'ApplyPreset' -%}
+{%- set apply_template  = prefix ~ 'ApplyTemplate' -%}
+{%- set apply_profile   = prefix ~ 'ApplyProfile' -%}
+
+<div x-show="{{ open_flag }}" x-cloak
+     class="fixed inset-0 z-50 overflow-y-auto"
+     @keydown.escape.window="{{ open_flag }} = false"
+     x-transition:enter="transition ease-out duration-200"
+     x-transition:enter-start="opacity-0"
+     x-transition:enter-end="opacity-100"
+     x-transition:leave="transition ease-in duration-150"
+     x-transition:leave-start="opacity-100"
+     x-transition:leave-end="opacity-0">
+    <div class="fixed inset-0 bg-black/50" @click="{{ open_flag }} = false"></div>
+
+    <div class="flex min-h-full items-center justify-center p-4">
+        <div class="relative bg-white rounded-2xl sm:rounded-3xl shadow-2xl w-full max-w-4xl p-4 sm:p-8"
+             @click.stop
+             x-transition:enter="transition ease-out duration-200"
+             x-transition:enter-start="opacity-0 scale-95"
+             x-transition:enter-end="opacity-100 scale-100"
+             x-transition:leave="transition ease-in duration-150"
+             x-transition:leave-start="opacity-100 scale-100"
+             x-transition:leave-end="opacity-0 scale-95">
+
+            <!-- Header -->
+            <div class="flex items-start justify-between mb-6 gap-4">
+                <div class="min-w-0">
+                    <div class="flex items-center gap-2 mb-1">
+                        <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400">Run-time Settings</h3>
+                        <span x-show="{{ dirty_fn }}" x-cloak
+                              class="text-[10px] px-2 py-0.5 rounded-full bg-amber-100 text-amber-700 font-bold uppercase tracking-wider">modified</span>
+                    </div>
+                    <h4 class="text-xl font-bold tracking-tight text-neutral-900 truncate"
+                        x-text="{{ selected_model }}?.name || {{ selected_model }}?.id || 'Select a model'"></h4>
+                    <p class="text-xs text-neutral-500 mt-1">
+                        Changes apply to the next run only. Use Save to persist, or Save as Profile to bookmark.
+                    </p>
+                </div>
+
+                <!-- Right-side header controls: Reset (text only) + X close. -->
+                <div class="flex items-center gap-1 flex-shrink-0">
+                    <button @click="{{ reset_fn }}"
+                            :disabled="!{{ dirty_fn }} || {{ running }}"
+                            class="px-3 py-1.5 text-sm font-medium text-neutral-500 hover:text-neutral-900 transition-colors disabled:opacity-40 disabled:cursor-not-allowed">
+                        Reset
+                    </button>
+                    <button @click="{{ open_flag }} = false"
+                            aria-label="Close"
+                            class="p-2 text-neutral-400 hover:text-neutral-600 hover:bg-neutral-100 rounded-full transition-all">
+                        <i data-lucide="x" class="w-5 h-5"></i>
+                    </button>
+                </div>
+            </div>
+
+            <p x-show="{{ reload_fn }}" x-cloak
+               class="text-xs text-amber-600 mb-4 flex items-start gap-2 px-4 py-2 bg-amber-50 border border-amber-200 rounded-lg">
+                <i data-lucide="alert-triangle" class="w-3.5 h-3.5 flex-shrink-0 mt-0.5"></i>
+                <span>Engine-init flags changed (TurboQuant / DFlash / MTP / IndexCache). The bench reloads the model on each run, so no extra cost.</span>
+            </p>
+
+            <!-- Quick-apply: presets / global templates / model profiles -->
+            <div class="mb-5 p-4 bg-neutral-50 rounded-xl border border-neutral-200">
+                <div class="flex items-center justify-between mb-3 gap-3 flex-wrap">
+                    <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400">Apply Profile / Template</h3>
+                    <div class="flex items-center gap-1 bg-neutral-100 rounded-lg p-0.5">
+                        <button type="button" @click="{{ scope }} = 'preset'"
+                                :class="{{ scope }} === 'preset' ? 'bg-white text-neutral-900 shadow-sm' : 'text-neutral-500 hover:text-neutral-700'"
+                                class="px-3 py-1 text-xs font-medium rounded-md transition-all">Preset</button>
+                        <button type="button" @click="{{ scope }} = 'global'"
+                                :class="{{ scope }} === 'global' ? 'bg-white text-neutral-900 shadow-sm' : 'text-neutral-500 hover:text-neutral-700'"
+                                class="px-3 py-1 text-xs font-medium rounded-md transition-all">Global Template</button>
+                        <button type="button" @click="{{ scope }} = 'model'"
+                                :class="{{ scope }} === 'model' ? 'bg-white text-neutral-900 shadow-sm' : 'text-neutral-500 hover:text-neutral-700'"
+                                class="px-3 py-1 text-xs font-medium rounded-md transition-all">Model Profile</button>
+                    </div>
+                </div>
+
+                <!-- Preset scope -->
+                <div x-show="{{ scope }} === 'preset'" x-cloak class="flex flex-wrap items-center gap-2">
+                    <template x-for="p in presets" :key="'{{ prefix }}-preset-' + p.name">
+                        <button type="button" @click="{{ apply_preset }}(p)"
+                                :title="p.description || ''"
+                                class="px-2.5 py-1 text-xs rounded-full border border-blue-200 bg-blue-50 text-blue-700 hover:bg-blue-100 transition-all"
+                                x-text="p.display_name || p.name"></button>
+                    </template>
+                    <span x-show="presets.length === 0" class="text-xs text-neutral-400 italic">no presets available</span>
+                </div>
+
+                <!-- Global template scope -->
+                <div x-show="{{ scope }} === 'global'" x-cloak class="flex flex-wrap items-center gap-2">
+                    <template x-for="tmpl in templates" :key="'{{ prefix }}-tmpl-' + tmpl.name">
+                        <button type="button" @click="{{ apply_template }}(tmpl)"
+                                :title="tmpl.description || ''"
+                                class="px-2.5 py-1 text-xs rounded-full border border-blue-200 bg-blue-50 text-blue-700 hover:bg-blue-100 transition-all"
+                                x-text="tmpl.display_name || tmpl.name"></button>
+                    </template>
+                    <span x-show="templates.length === 0" class="text-xs text-neutral-400 italic">no templates yet — create them in the model settings modal</span>
+                </div>
+
+                <!-- Model profile scope -->
+                <div x-show="{{ scope }} === 'model'" x-cloak class="flex flex-wrap items-center gap-2">
+                    <template x-for="p in {{ profiles }}" :key="'{{ prefix }}-profile-' + p.name">
+                        <button type="button" @click="{{ apply_profile }}(p)"
+                                :title="p.description || ''"
+                                :class="{{ active_profile }} === p.name
+                                    ? 'bg-emerald-500 text-white border border-emerald-500'
+                                    : 'bg-emerald-50 text-emerald-700 border border-emerald-200 hover:bg-emerald-100'"
+                                class="px-2.5 py-1 text-xs rounded-full transition-all"
+                                x-text="p.display_name || p.name"></button>
+                    </template>
+                    <span x-show="{{ profiles }}.length === 0" class="text-xs text-neutral-400 italic">no profiles for this model — use Save as Profile below to create one</span>
+                </div>
+            </div>
+
+            <!-- Settings fields -->
+            {{ settings_fields(state, selected_model, mode='panel') }}
+
+            <!-- Save-as-Profile inline name prompt -->
+            <div x-show="{{ save_open }}" x-cloak x-transition
+                 class="mt-6 p-4 bg-neutral-50 border border-neutral-200 rounded-xl space-y-2">
+                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500">Profile name</label>
+                <input type="text" x-model="{{ new_profile }}.display_name"
+                       placeholder="{{ placeholder }}"
+                       class="w-full px-3 py-2 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent">
+                <input type="text" x-model="{{ new_profile }}.description"
+                       placeholder="Optional description"
+                       class="w-full px-3 py-2 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent">
+                <div class="flex justify-end gap-2 pt-1">
+                    <button @click="{{ save_cancel }}"
+                            class="px-3 py-1.5 text-xs text-neutral-600 hover:bg-neutral-100 rounded-lg">Cancel</button>
+                    <button @click="{{ save_confirm }}"
+                            :disabled="!{{ new_profile }}.display_name?.trim()"
+                            class="px-3 py-1.5 text-xs bg-neutral-900 text-white rounded-lg hover:bg-neutral-800 disabled:opacity-40 disabled:cursor-not-allowed">Create</button>
+                </div>
+            </div>
+
+            <!-- Action row — icons only.
+                 Persistence state is read off the buttons themselves: the Save
+                 button is dimmed when there is nothing to save, swaps to a
+                 spinner while saving, and flashes a check on success.
+                 Order (left → right): bookmark this state, persist this
+                 state, run with this state. -->
+            <div class="flex items-center justify-end gap-2 mt-8 pt-4 border-t border-neutral-100">
+
+                <!-- Save as Profile (bookmark-plus) -->
+                <button @click="{{ save_as_fn }}"
+                        :disabled="!{{ dirty_fn }} || !{{ selected_model }} || {{ running }}"
+                        aria-label="Save as Profile"
+                        title="Save as Profile…"
+                        class="p-2 rounded-lg border border-neutral-200 bg-neutral-100 hover:bg-neutral-200 text-neutral-700 transition-all disabled:opacity-40 disabled:cursor-not-allowed">
+                    <i data-lucide="bookmark-plus" class="w-6 h-6"></i>
+                </button>
+
+                <!-- Save to model settings (save → check → spinner).
+                     Title/aria-label switch with state so hover and screen
+                     readers get the same context the icon implies. -->
+                <button @click="{{ save_fn }}"
+                        :disabled="!{{ dirty_fn }} || !{{ selected_model }} || {{ running }} || {{ saving }}"
+                        :class="{{ recently_saved }} ? 'text-emerald-600 border border-emerald-200 bg-emerald-50' : 'text-white bg-neutral-900 hover:bg-neutral-800'"
+                        :aria-label="{{ saving }} ? 'Saving' : ({{ recently_saved }} ? 'Saved' : ({{ dirty_fn }} ? 'Save to model settings' : 'No changes to save'))"
+                        :title="{{ saving }} ? 'Saving…' : ({{ recently_saved }} ? 'Saved' : ({{ dirty_fn }} ? 'Save to model settings' : 'No changes to save'))"
+                        class="p-2 rounded-lg transition-all disabled:opacity-40 disabled:cursor-not-allowed">
+                    <span x-show="{{ saving }}" x-cloak class="block">
+                        <svg class="animate-spin w-6 h-6" viewBox="0 0 24 24">
+                            <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"/>
+                            <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"/>
+                        </svg>
+                    </span>
+                    <i data-lucide="check"
+                       x-show="!{{ saving }} && {{ recently_saved }}"
+                       x-cloak class="w-6 h-6"></i>
+                    <i data-lucide="save"
+                       x-show="!{{ saving }} && !{{ recently_saved }}"
+                       class="w-6 h-6"></i>
+                </button>
+
+                <!-- Run benchmark (play) — primary action visually rightmost. -->
+                <button @click="{{ run_fn }}; {{ open_flag }} = false"
+                        :disabled="{{ run_disabled_expr }}"
+                        aria-label="Run benchmark"
+                        title="Run benchmark"
+                        class="p-2 rounded-lg border border-neutral-300 bg-white hover:bg-neutral-50 hover:border-neutral-400 text-neutral-900 transition-all disabled:opacity-40 disabled:cursor-not-allowed">
+                    <i data-lucide="play" class="w-6 h-6"></i>
+                </button>
+            </div>
+        </div>
+    </div>
+</div>
+{%- endmacro %}
diff --git a/omlx/admin/templates/dashboard/_modal_model_settings.html b/omlx/admin/templates/dashboard/_modal_model_settings.html
index 31cb4d6f8..cdf4d7527 100644
--- a/omlx/admin/templates/dashboard/_modal_model_settings.html
+++ b/omlx/admin/templates/dashboard/_modal_model_settings.html
@@ -251,682 +251,9 @@ <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400">{{ t('m
                      class="fixed z-50 px-2.5 py-1 text-xs font-medium text-white bg-neutral-800 rounded-md whitespace-nowrap pointer-events-none -translate-x-1/2"
                      :style="`top: ${tip.y}px; left: ${tip.x}px`"></div>
 
-                <!-- Two-column layout -->
-                <div class="flex flex-col lg:flex-row">
-                    <!-- Left: Basic Settings -->
-                    <div class="w-full lg:w-3/5 lg:pr-6 lg:border-r border-neutral-200">
-                        <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-5">{{ t('modal.model_settings.basic_label') }}</h3>
-                        <div class="space-y-5">
-                            <!-- Row 1: Model Alias / Model Type / Reasoning Parser -->
-                            <div class="grid grid-cols-1 gap-4" :class="reasoningParsers.length > 0 ? 'sm:grid-cols-3' : 'sm:grid-cols-2'">
-                                <div>
-                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.model_alias') }}</label>
-                                    <input type="text" x-model="modelSettings.model_alias"
-                                           :placeholder="selectedModel?.id"
-                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                </div>
-                                <div>
-                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.model_type') }}</label>
-                                    <select x-model="modelSettings.model_type_override"
-                                            class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                        <option value="">{{ t('modal.model_settings.model_type_auto') }}</option>
-                                        <option value="llm">LLM</option>
-                                        <option value="vlm">VLM</option>
-                                        <option value="embedding">Embedding</option>
-                                        <option value="reranker">Reranker</option>
-                                        <option value="audio_stt">Audio STT</option>
-                                        <option value="audio_tts">Audio TTS</option>
-                                        <option value="audio_sts">Audio STS</option>
-                                    </select>
-                                </div>
-                                <div x-show="reasoningParsers.length > 0">
-                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Reasoning Parser</label>
-                                    <select x-model="modelSettings.reasoning_parser"
-                                            class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                        <option value="">None</option>
-                                        <template x-for="p in reasoningParsers" :key="p.value">
-                                            <option :value="p.value" x-text="p.label + ' (' + p.models.join(', ') + ')'"></option>
-                                        </template>
-                                    </select>
-                                </div>
-                            </div>
-
-                            <!-- Sampling settings (LLM/VLM only) -->
-                            <template x-if="!selectedModel?.model_type || selectedModel.model_type === 'llm' || selectedModel.model_type === 'vlm'">
-                                <div class="space-y-5">
-                                    <!-- Row 2: Ctx Window / Max Tokens / Temperature -->
-                                    <div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.max_context_window') }}</label>
-                                            <input type="number" x-model.number="modelSettings.max_context_window" placeholder="{{ t('modal.model_settings.placeholder_default') }}" min="1"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.max_tokens') }}</label>
-                                            <input type="number" x-model.number="modelSettings.max_tokens" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.temperature') }}</label>
-                                            <input type="number" x-model.number="modelSettings.temperature" step="0.1" min="0" max="2" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                    </div>
-
-                                    <!-- Row 3: Top P / Top K / Min P -->
-                                    <div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.top_p') }}</label>
-                                            <input type="number" x-model.number="modelSettings.top_p" step="0.05" min="0" max="1" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.top_k') }}</label>
-                                            <input type="number" x-model.number="modelSettings.top_k" min="0" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.min_p') }}</label>
-                                            <input type="number" x-model.number="modelSettings.min_p" step="0.01" min="0" max="1" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                    </div>
-
-                                    <!-- Row 4: Repetition Penalty / Presence Penalty -->
-                                    <div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.repetition_penalty') }}</label>
-                                            <input type="number" x-model.number="modelSettings.repetition_penalty" step="0.05" min="0" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.presence_penalty') }}</label>
-                                            <input type="number" x-model.number="modelSettings.presence_penalty" step="0.05" min="-2" max="2" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                    </div>
-                                </div>
-                            </template>
-
-                            <!-- TTL (all model types) -->
-                            <div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
-                                <div>
-                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.ttl_seconds') }}</label>
-                                    <input type="number" x-model.number="modelSettings.ttl_seconds"
-                                           :placeholder="ttlPlaceholder"
-                                           :disabled="selectedModel?.pinned"
-                                           min="0" step="60"
-                                           :class="selectedModel?.pinned ? 'bg-neutral-100 text-neutral-400 cursor-not-allowed' : ''"
-                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                </div>
-                                <div class="flex items-end"
-                                     x-show="!selectedModel?.model_type || selectedModel.model_type === 'llm' || selectedModel.model_type === 'vlm'">
-                                    <button type="button" @click="loadGenerationDefaults"
-                                            :disabled="loadingGenDefaults"
-                                            class="w-full px-4 py-2.5 text-sm font-medium text-neutral-700 bg-neutral-100 hover:bg-neutral-200 rounded-xl border border-neutral-200 transition-all disabled:opacity-50">
-                                        <span x-show="!loadingGenDefaults">{{ t('modal.model_settings.load_defaults') }}</span>
-                                        <span x-show="loadingGenDefaults">{{ t('modal.model_settings.loading_defaults') }}</span>
-                                    </button>
-                                </div>
-                            </div>
-
-                            <p class="text-xs text-neutral-400 text-center"
-                               x-show="!selectedModel?.model_type || selectedModel.model_type === 'llm' || selectedModel.model_type === 'vlm'">{{ t('modal.model_settings.empty_hint') }}</p>
-                        </div>
-                    </div>
-
-                    <!-- Right: Advanced Settings -->
-                    <div class="w-full lg:w-2/5 lg:pl-6 mt-6 lg:mt-0 pt-6 lg:pt-0 border-t lg:border-t-0 border-neutral-200">
-                        <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-5">{{ t('modal.model_settings.advanced_label') }}</h3>
-                        <div class="overflow-y-auto max-h-[28rem] space-y-4 pr-1"
-                             x-show="!selectedModel?.model_type || selectedModel.model_type === 'llm' || selectedModel.model_type === 'vlm'">
-                            <!-- Enable Thinking -->
-                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                <div class="flex items-start justify-between gap-3">
-                                    <div class="min-w-0">
-                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.enable_thinking') }}</span>
-                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.enable_thinking_hint') }}</p>
-                                    </div>
-                                    <button type="button" @click="
-                                            const effective = modelSettings.enable_thinking != null ? modelSettings.enable_thinking : (modelSettings.thinking_default || false);
-                                            modelSettings.enable_thinking = !effective;
-                                        "
-                                            :class="(modelSettings.enable_thinking != null ? modelSettings.enable_thinking : (modelSettings.thinking_default || false)) ? 'bg-black' : 'bg-neutral-200'"
-                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                        <span :class="(modelSettings.enable_thinking != null ? modelSettings.enable_thinking : (modelSettings.thinking_default || false)) ? 'translate-x-5' : 'translate-x-0'"
-                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                    </button>
-                                </div>
-                                <p x-show="modelSettings.enable_thinking == null && modelSettings.thinking_default != null"
-                                   class="text-xs text-neutral-400" x-text="'{{ t('modal.model_settings.enable_thinking_default') }}'"></p>
-                                <p x-show="modelSettings.enable_thinking != null && modelSettings.thinking_default != null && modelSettings.enable_thinking !== modelSettings.thinking_default"
-                                   class="text-xs text-amber-500 cursor-pointer hover:text-amber-600"
-                                   @click="modelSettings.enable_thinking = null"
-                                   x-text="'{{ t('modal.model_settings.enable_thinking_reset') }}'"></p>
-                            </div>
-
-                            <!-- Thinking Budget -->
-                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                <div class="flex items-start justify-between gap-3">
-                                    <div class="min-w-0">
-                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.thinking_budget') }}</span>
-                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.thinking_budget_hint') }}</p>
-                                    </div>
-                                    <button type="button" @click="modelSettings.enableThinkingBudget = !modelSettings.enableThinkingBudget; if (!modelSettings.enableThinkingBudget) modelSettings.thinking_budget_tokens = null;"
-                                            :class="modelSettings.enableThinkingBudget ? 'bg-black' : 'bg-neutral-200'"
-                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                        <span :class="modelSettings.enableThinkingBudget ? 'translate-x-5' : 'translate-x-0'"
-                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                    </button>
-                                </div>
-                                <div x-show="modelSettings.enableThinkingBudget" x-transition class="pt-1">
-                                    <input type="number" x-model.number="modelSettings.thinking_budget_tokens" placeholder="{{ t('modal.model_settings.thinking_budget_placeholder') }}" min="1" step="100"
-                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                </div>
-                            </div>
-
-                            <!-- Limit Tool Result Tokens -->
-                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                <div class="flex items-start justify-between gap-3">
-                                    <div class="min-w-0">
-                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.limit_tool_result') }}</span>
-                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.limit_tool_result_hint') }}</p>
-                                    </div>
-                                    <button type="button" @click="modelSettings.enableToolResultLimit = !modelSettings.enableToolResultLimit; if (!modelSettings.enableToolResultLimit) modelSettings.max_tool_result_tokens = null;"
-                                            :class="modelSettings.enableToolResultLimit ? 'bg-black' : 'bg-neutral-200'"
-                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                        <span :class="modelSettings.enableToolResultLimit ? 'translate-x-5' : 'translate-x-0'"
-                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                    </button>
-                                </div>
-                                <div x-show="modelSettings.enableToolResultLimit" x-transition class="pt-1">
-                                    <input type="number" x-model.number="modelSettings.max_tool_result_tokens" placeholder="{{ t('modal.model_settings.limit_tool_placeholder') }}" min="100" step="100"
-                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                </div>
-                            </div>
-
-                            <!-- Force Sampling -->
-                            <div class="p-4 bg-neutral-50 rounded-xl">
-                                <div class="flex items-start justify-between gap-3">
-                                    <div class="min-w-0">
-                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.force_sampling') }}</span>
-                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.force_sampling_hint') }}</p>
-                                    </div>
-                                    <button type="button" @click="modelSettings.force_sampling = !modelSettings.force_sampling"
-                                            :class="modelSettings.force_sampling ? 'bg-black' : 'bg-neutral-200'"
-                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                        <span :class="modelSettings.force_sampling ? 'translate-x-5' : 'translate-x-0'"
-                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                    </button>
-                                </div>
-                            </div>
-
-                            <!-- Trust Remote Code (security-sensitive) -->
-                            <div class="p-4 bg-red-50 rounded-xl border border-red-100">
-                                <div class="flex items-start justify-between gap-3">
-                                    <div class="min-w-0">
-                                        <span class="text-sm font-medium text-red-700">{{ t('modal.model_settings.trust_remote_code') }}</span>
-                                        <p class="text-xs text-red-600 mt-0.5">{{ t('modal.model_settings.trust_remote_code_hint') }}</p>
-                                    </div>
-                                    <button type="button" @click="modelSettings.trust_remote_code = !modelSettings.trust_remote_code"
-                                            :class="modelSettings.trust_remote_code ? 'bg-red-600' : 'bg-neutral-200'"
-                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500">
-                                        <span :class="modelSettings.trust_remote_code ? 'translate-x-5' : 'translate-x-0'"
-                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                    </button>
-                                </div>
-                            </div>
-
-                            <!-- Chat Template Kwargs -->
-                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                <div class="flex items-center justify-between">
-                                    <div>
-                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.chat_template_kwargs') }}</span>
-                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.chat_template_kwargs_hint') }}</p>
-                                    </div>
-                                    <!-- Add dropdown -->
-                                    <div class="relative" x-data="{ addOpen: false }">
-                                        <button type="button" @click="addOpen = !addOpen"
-                                                class="text-xs text-neutral-500 hover:text-neutral-700 font-medium transition-colors">{{ t('modal.model_settings.add_kwarg') }}</button>
-                                        <div x-show="addOpen" @click.away="addOpen = false" x-transition
-                                             class="absolute right-0 mt-1 w-44 bg-white border border-neutral-200 rounded-lg shadow-lg z-10 py-1">
-                                            <button type="button"
-                                                    x-show="!modelSettings.ctKwargEntries.some(e => e.type === 'enable_thinking')"
-                                                    @click="modelSettings.ctKwargEntries.push({type: 'enable_thinking', value: 'true', force: false}); addOpen = false"
-                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_enable_thinking') }}</button>
-                                            <button type="button"
-                                                    x-show="!modelSettings.ctKwargEntries.some(e => e.type === 'reasoning_effort')"
-                                                    @click="modelSettings.ctKwargEntries.push({type: 'reasoning_effort', value: 'low', force: false}); addOpen = false"
-                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_reasoning_effort') }}</button>
-                                            <button type="button"
-                                                    @click="modelSettings.ctKwargEntries.push({type: 'custom', key: '', value: '', force: false}); addOpen = false"
-                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_custom') }}</button>
-                                        </div>
-                                    </div>
-                                </div>
-
-                                <!-- Dynamic entries -->
-                                <template x-for="(entry, idx) in modelSettings.ctKwargEntries" :key="idx">
-                                    <div class="space-y-2">
-                                        <!-- Row 1: Label + Remove -->
-                                        <div class="flex items-center justify-between">
-                                            <template x-if="entry.type === 'enable_thinking'">
-                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_enable_thinking') }}</span>
-                                            </template>
-                                            <template x-if="entry.type === 'reasoning_effort'">
-                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_reasoning_effort') }}</span>
-                                            </template>
-                                            <template x-if="entry.type === 'custom'">
-                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_custom') }}</span>
-                                            </template>
-                                            <button type="button" @click="modelSettings.ctKwargEntries.splice(idx, 1)"
-                                                    class="p-1 text-neutral-400 hover:text-red-500 transition-colors">
-                                                <svg xmlns="http://www.w3.org/2000/svg" class="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 6 6 18"/><path d="m6 6 12 12"/></svg>
-                                            </button>
-                                        </div>
-                                        <!-- Row 2: Value + Force -->
-                                        <div class="flex gap-2 items-center">
-                                            <template x-if="entry.type === 'enable_thinking'">
-                                                <select x-model="entry.value"
-                                                        class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="true">true</option>
-                                                    <option value="false">false</option>
-                                                </select>
-                                            </template>
-                                            <template x-if="entry.type === 'reasoning_effort'">
-                                                <select x-model="entry.value"
-                                                        class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="low">{{ t('modal.model_settings.kwarg_effort_low') }}</option>
-                                                    <option value="medium">{{ t('modal.model_settings.kwarg_effort_medium') }}</option>
-                                                    <option value="high">{{ t('modal.model_settings.kwarg_effort_high') }}</option>
-                                                </select>
-                                            </template>
-                                            <template x-if="entry.type !== 'custom'">
-                                                <label class="flex items-center gap-1 flex-shrink-0" title="{{ t('modal.model_settings.kwarg_force_tooltip') }}">
-                                                    <input type="checkbox" x-model="entry.force"
-                                                           class="w-3.5 h-3.5 rounded border-neutral-300 text-neutral-900 focus:ring-neutral-900">
-                                                    <span class="text-xs text-neutral-400">{{ t('modal.model_settings.kwarg_force') }}</span>
-                                                </label>
-                                            </template>
-                                        </div>
-                                        <!-- Row 2b: Custom key/value + Force (stacked) -->
-                                        <template x-if="entry.type === 'custom'">
-                                            <div class="space-y-2">
-                                                <input type="text" x-model="entry.key" placeholder="{{ t('modal.model_settings.kwarg_custom_key_placeholder') }}"
-                                                       class="w-full px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                                <div class="flex gap-2 items-center">
-                                                    <input type="text" x-model="entry.value" placeholder="{{ t('modal.model_settings.kwarg_custom_value_placeholder') }}"
-                                                           class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all min-w-0">
-                                                    <label class="flex items-center gap-1 flex-shrink-0" title="{{ t('modal.model_settings.kwarg_force_tooltip') }}">
-                                                        <input type="checkbox" x-model="entry.force"
-                                                               class="w-3.5 h-3.5 rounded border-neutral-300 text-neutral-900 focus:ring-neutral-900">
-                                                        <span class="text-xs text-neutral-400">{{ t('modal.model_settings.kwarg_force') }}</span>
-                                                    </label>
-                                                </div>
-                                            </div>
-                                        </template>
-                                    </div>
-                                </template>
-
-                                <!-- Empty state -->
-                                <p x-show="modelSettings.ctKwargEntries.length === 0" class="text-xs text-neutral-400 text-center py-1">{{ t('modal.model_settings.no_kwargs') }}</p>
-                            </div>
-
-                            <!-- Experimental Features -->
-                            <h4 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-3">{{ t('modal.model_settings.experimental_label') }}</h4>
-
-                                <!-- TurboQuant KV Cache -->
-                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mb-3">
-                                    <div class="flex items-start justify-between gap-3">
-                                        <div class="min-w-0">
-                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.turboquant_kv') }}</span>
-                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.turboquant_kv_hint') }}</p>
-                                        </div>
-                                        <button type="button"
-                                                @click="if (!modelSettings.is_paroquant && !modelSettings.mtp_enabled && !modelSettings.vlm_mtp_enabled) modelSettings.turboquant_kv_enabled = !modelSettings.turboquant_kv_enabled"
-                                                :disabled="modelSettings.is_paroquant || modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled"
-                                                :title="modelSettings.is_paroquant ? modelSettings.paroquant_reason : (modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled ? '{{ t('modal.model_settings.mtp_conflict_turboquant') }}' : '')"
-                                                :class="[
-                                                    modelSettings.turboquant_kv_enabled ? 'bg-black' : 'bg-neutral-200',
-                                                    (modelSettings.is_paroquant || modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
-                                                ]"
-                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                            <span :class="modelSettings.turboquant_kv_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                        </button>
-                                    </div>
-                                    <div x-show="modelSettings.turboquant_kv_enabled" x-transition class="pt-1">
-                                        <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.turboquant_kv_bits_label') }}</label>
-                                        <select x-model="modelSettings.turboquant_kv_bits"
-                                                class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                            <option value="2">2-bit</option>
-                                            <option value="2.5">2.5-bit</option>
-                                            <option value="3">3-bit</option>
-                                            <option value="3.5">3.5-bit</option>
-                                            <option value="4">4-bit</option>
-                                            <option value="6">6-bit</option>
-                                            <option value="8">8-bit</option>
-                                        </select>
-                                    </div>
-                                </div>
-
-                                <!-- IndexCache (DSA models only) -->
-                                <template x-if="DSA_MODEL_TYPES.has(selectedModel?.config_model_type || '')">
-                                    <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mb-3">
-                                        <div class="flex items-start justify-between gap-3">
-                                            <div class="min-w-0">
-                                                <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.index_cache') }}</span>
-                                                <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.index_cache_hint') }} (<a href="https://github.com/THUDM/IndexCache" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">GitHub</a>)</p>
-                                            </div>
-                                            <button type="button"
-                                                    @click="if (!modelSettings.is_paroquant) { modelSettings.enableIndexCache = !modelSettings.enableIndexCache; if (!modelSettings.enableIndexCache) modelSettings.index_cache_freq = null; }"
-                                                    :disabled="modelSettings.is_paroquant"
-                                                    :title="modelSettings.is_paroquant ? modelSettings.paroquant_reason : ''"
-                                                    :class="[
-                                                        modelSettings.enableIndexCache ? 'bg-black' : 'bg-neutral-200',
-                                                        modelSettings.is_paroquant ? 'opacity-40 cursor-not-allowed' : ''
-                                                    ]"
-                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                                <span :class="modelSettings.enableIndexCache ? 'translate-x-5' : 'translate-x-0'"
-                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                            </button>
-                                        </div>
-                                        <div x-show="modelSettings.enableIndexCache" x-transition class="pt-1">
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.index_cache_freq') }}</label>
-                                            <input type="number" x-model.number="modelSettings.index_cache_freq" min="2" max="16" step="1" placeholder="4"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                        </div>
-                                    </div>
-                                </template>
-
-                                <!-- SpecPrefill -->
-                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                    <div class="flex items-start justify-between gap-3">
-                                        <div class="min-w-0">
-                                            <span class="text-sm font-medium text-neutral-700">SpecPrefill</span>
-                                            <p class="text-xs text-neutral-500 mt-0.5">Attention-based sparse prefill for MoE/hybrid models. (<a href="https://arxiv.org/abs/2502.02789" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">Paper</a>) (<a href="https://huggingface.co/Thump604/specprefill-paper" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">HuggingFace</a>)</p>
-                                        </div>
-                                        <button type="button"
-                                                @click="if (!modelSettings.is_paroquant && !modelSettings.vlm_mtp_enabled) modelSettings.specprefill_enabled = !modelSettings.specprefill_enabled"
-                                                :disabled="modelSettings.is_paroquant || modelSettings.vlm_mtp_enabled"
-                                                :title="modelSettings.is_paroquant ? modelSettings.paroquant_reason : (modelSettings.vlm_mtp_enabled ? '{{ t('modal.model_settings.vlm_mtp_conflict') }}' : '')"
-                                                :class="[
-                                                    modelSettings.specprefill_enabled ? 'bg-black' : 'bg-neutral-200',
-                                                    (modelSettings.is_paroquant || modelSettings.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
-                                                ]"
-                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                            <span :class="modelSettings.specprefill_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                        </button>
-                                    </div>
-                                    <div x-show="modelSettings.specprefill_enabled" x-transition class="space-y-3 pt-1">
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft Model</label>
-                                            <select x-model="modelSettings.specprefill_draft_model"
-                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                <option value="">Select draft model...</option>
-                                                <template x-for="m in models.filter(m => m.id !== selectedModel?.id)" :key="m.id">
-                                                    <option :value="m.model_path || m.id" x-text="m.id" :selected="modelSettings.specprefill_draft_model === (m.model_path || m.id)"></option>
-                                                </template>
-                                            </select>
-                                            <p class="text-xs text-neutral-400 mt-1">Small model sharing tokenizer with target (e.g. Qwen3.5-0.8B for 35B)</p>
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Keep Rate</label>
-                                            <select x-model="modelSettings.specprefill_keep_pct"
-                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                <option value="0.1">10% — Aggressive (~5-7x, some quality loss)</option>
-                                                <option value="0.2">20% — Balanced (~3x, recommended)</option>
-                                                <option value="0.25">25% — Conservative+ (~2.5x)</option>
-                                                <option value="0.3">30% — Conservative (~2.2x)</option>
-                                                <option value="0.4">40% — Mild (~1.8x)</option>
-                                                <option value="0.5">50% — Minimal (~1.5x)</option>
-                                            </select>
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Threshold (tokens)</label>
-                                            <input type="number" x-model.number="modelSettings.specprefill_threshold" min="1024" max="131072" step="1024" placeholder="8192"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                            <p class="text-xs text-neutral-400 mt-1">Min tokens to trigger (shorter prompts use full prefill)</p>
-                                        </div>
-                                    </div>
-                                </div>
-
-                                <!-- DFlash -->
-                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
-                                    <div class="flex items-start justify-between gap-3">
-                                        <div class="min-w-0">
-                                            <span class="text-sm font-medium text-neutral-700">DFlash</span>
-                                            <p class="text-xs text-neutral-500 mt-0.5">Block diffusion speculative decoding for 3-4x faster generation. Supports Qwen (3, 3.5, 3.6) and Gemma4 model families. Requires a DFlash draft model checkpoint.<br><strong>Single-stream only: requests run one at a time.</strong><br>* MLX impl by bstnxbt(<a href="https://github.com/bstnxbt/dflash-mlx" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">GitHub</a>)</p>
-                                            <p x-show="!modelSettings.dflash_compatible && modelSettings.dflash_compatibility_reason"
-                                               class="text-xs text-amber-600 mt-1"
-                                               x-text="modelSettings.dflash_compatibility_reason"></p>
-                                        </div>
-                                        <button type="button"
-                                                @click="if (modelSettings.dflash_compatible && !modelSettings.mtp_enabled && !modelSettings.vlm_mtp_enabled) modelSettings.dflash_enabled = !modelSettings.dflash_enabled"
-                                                :disabled="!modelSettings.dflash_compatible || modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled"
-                                                :title="modelSettings.dflash_compatible ? (modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled ? '{{ t('modal.model_settings.mtp_conflict') }}' : '') : (modelSettings.dflash_compatibility_reason || 'Unsupported model')"
-                                                :class="[
-                                                    modelSettings.dflash_enabled ? 'bg-black' : 'bg-neutral-200',
-                                                    (!modelSettings.dflash_compatible || modelSettings.mtp_enabled || modelSettings.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
-                                                ]"
-                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                            <span :class="modelSettings.dflash_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                        </button>
-                                    </div>
-                                    <div x-show="modelSettings.dflash_enabled" x-transition class="space-y-3 pt-1">
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft Model</label>
-                                            <select x-model="modelSettings.dflash_draft_model"
-                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                <option value="">Select draft model...</option>
-                                                <template x-for="m in models.filter(m => m.id !== selectedModel?.id)" :key="m.id">
-                                                    <option :value="m.model_path || m.id" x-text="m.id" :selected="modelSettings.dflash_draft_model === (m.model_path || m.id)"></option>
-                                                </template>
-                                            </select>
-                                            <p class="text-xs text-neutral-400 mt-1">DFlash draft checkpoint (e.g. z-lab/Qwen3-4B-DFlash-b16, z-lab/gemma-4-26B-A4B-it-DFlash). Note: -DFlash suffix only; -assistant variants are for MTP.</p>
-                                        </div>
-                                        <div class="bg-neutral-50 rounded-xl">
-                                            <div class="flex items-start justify-between gap-3">
-                                                <div class="min-w-0">
-                                                    <span class="text-sm font-medium text-neutral-700">Quantization</span>
-                                                    <p class="text-xs text-neutral-500 mt-0.5">Enable quantization for the draft model (weight, activation bits &amp; group size).</p>
-                                                </div>
-                                                <button type="button" @click="modelSettings.dflash_draft_quant_enabled = !modelSettings.dflash_draft_quant_enabled"
-                                                        :class="modelSettings.dflash_draft_quant_enabled ? 'bg-black' : 'bg-neutral-200'"
-                                                        class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                                    <span :class="modelSettings.dflash_draft_quant_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                          class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                                </button>
-                                            </div>
-                                        </div>
-                                        <div class="grid grid-cols-3 gap-3 pl-4" x-show="modelSettings.dflash_draft_quant_enabled" x-transition>
-                                            <div>
-                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Weight Bits</label>
-                                                <select x-model="modelSettings.dflash_draft_quant_weight_bits"
-                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="2">2-bit</option>
-                                                    <option value="4" selected>4-bit</option>
-                                                    <option value="8">8-bit</option>
-                                                </select>
-                                            </div>
-                                            <div>
-                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Activation Bits</label>
-                                                <select x-model="modelSettings.dflash_draft_quant_activation_bits"
-                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="16" selected>16-bit</option>
-                                                    <option value="32">32-bit</option>
-                                                </select>
-                                            </div>
-                                            <div>
-                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Group Size</label>
-                                                <select x-model="modelSettings.dflash_draft_quant_group_size"
-                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="32">32</option>
-                                                    <option value="64" selected>64</option>
-                                                    <option value="128">128</option>
-                                                </select>
-                                            </div>
-                                        </div>
-                                        <div>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Max Context (fallback threshold)</label>
-                                            <input type="number" x-model.number="modelSettings.dflash_max_ctx" min="1" step="1024" placeholder="unlimited"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                            <p class="text-xs text-neutral-400 mt-1">Prompts at or above this token count switch to BatchedEngine. Leave empty for unlimited.</p>
-                                        </div>
-                                        <div class="pt-2 border-t border-neutral-200 space-y-3">
-                                            <p class="text-xs font-bold uppercase tracking-wider text-neutral-500">Long-context tuning</p>
-                                            <div class="grid grid-cols-2 gap-3">
-                                                <div>
-                                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft window size</label>
-                                                    <input type="number" x-model.number="modelSettings.dflash_draft_window_size" min="1" step="128" placeholder="1024"
-                                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                                    <p class="text-xs text-neutral-400 mt-1">Draft model sliding-attention window. Helps stabilise acceptance on long contexts. Leave empty for dflash default (1024).</p>
-                                                </div>
-                                                <div>
-                                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft sink size</label>
-                                                    <input type="number" x-model.number="modelSettings.dflash_draft_sink_size" min="0" step="8" placeholder="64"
-                                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                                    <p class="text-xs text-neutral-400 mt-1">Attention-sink tokens always kept regardless of window. Leave empty for dflash default (64).</p>
-                                                </div>
-                                            </div>
-                                            <div>
-                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Verify mode</label>
-                                                <select x-model="modelSettings.dflash_verify_mode"
-                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                    <option value="adaptive">adaptive (default)</option>
-                                                    <option value="dflash">dflash</option>
-                                                    <option value="ddtree">ddtree</option>
-                                                    <option value="off">off</option>
-                                                </select>
-                                                <p class="text-xs text-neutral-400 mt-1">Verifier algorithm. "adaptive" shrinks block size when acceptance drops; "off" disables speculative verify.</p>
-                                            </div>
-                                        </div>
-                                        <div class="flex items-start justify-between gap-3 pt-2 border-t border-neutral-200">
-                                            <div class="min-w-0">
-                                                <span class="text-sm font-medium text-neutral-700">In-memory cache</span>
-                                                <p class="text-xs text-neutral-500 mt-0.5">DFlash L1 prefix snapshot cache in RAM. Speeds up multi-turn chats with shared prefixes.</p>
-                                            </div>
-                                            <button type="button" @click="modelSettings.dflash_in_memory_cache = !modelSettings.dflash_in_memory_cache; if (!modelSettings.dflash_in_memory_cache) modelSettings.dflash_ssd_cache = false"
-                                                    :class="modelSettings.dflash_in_memory_cache ? 'bg-black' : 'bg-neutral-200'"
-                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                                <span :class="modelSettings.dflash_in_memory_cache ? 'translate-x-5' : 'translate-x-0'"
-                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                            </button>
-                                        </div>
-                                        <div x-show="modelSettings.dflash_in_memory_cache" x-transition>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">In-memory cache max entries</label>
-                                            <input type="number" x-model.number="modelSettings.dflash_in_memory_cache_max_entries" min="1" max="128" step="1" placeholder="4"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                            <p class="text-xs text-neutral-400 mt-1">Maximum number of prefix snapshots kept in L1 cache. Each entry stores KV + draft GDN state for one conversation prefix.</p>
-                                        </div>
-                                        <div x-show="modelSettings.dflash_in_memory_cache" x-transition>
-                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">In-memory cache size (GiB)</label>
-                                            <input type="number" x-model.number="modelSettings.dflash_in_memory_cache_max_gib" min="1" max="256" step="1" placeholder="8"
-                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
-                                            <p class="text-xs text-neutral-400 mt-1">Byte budget for L1 snapshots; LRU evicts when exceeded.</p>
-                                        </div>
-                                        <div class="flex items-start justify-between gap-3">
-                                            <div class="min-w-0">
-                                                <span class="text-sm font-medium text-neutral-700">SSD cache</span>
-                                                <p class="text-xs text-neutral-500 mt-0.5">L2 spill of evicted L1 entries to disk. Uses the oMLX paged SSD cache directory (<code>dflash_l2/</code>).</p>
-                                                <p x-show="!modelSettings.dflash_ssd_cache_available" class="text-xs text-amber-600 mt-1">Enable oMLX paged SSD cache first (<code>--paged-ssd-cache-dir</code>).</p>
-                                                <p x-show="modelSettings.dflash_ssd_cache_available && !modelSettings.dflash_in_memory_cache" class="text-xs text-amber-600 mt-1">Requires in-memory cache to be enabled.</p>
-                                            </div>
-                                            <button type="button"
-                                                    @click="if (modelSettings.dflash_ssd_cache_available && modelSettings.dflash_in_memory_cache) modelSettings.dflash_ssd_cache = !modelSettings.dflash_ssd_cache"
-                                                    :disabled="!modelSettings.dflash_ssd_cache_available || !modelSettings.dflash_in_memory_cache"
-                                                    :class="[
-                                                        modelSettings.dflash_ssd_cache ? 'bg-black' : 'bg-neutral-200',
-                                                        (!modelSettings.dflash_ssd_cache_available || !modelSettings.dflash_in_memory_cache) ? 'opacity-40 cursor-not-allowed' : ''
-                                                    ]"
-                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                                <span :class="modelSettings.dflash_ssd_cache ? 'translate-x-5' : 'translate-x-0'"
-                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                            </button>
-                                        </div>
-                                    </div>
-                                </div>
-
-                                <!-- Native MTP (mlx-lm PR 990 + PR 15 monkey-patch) -->
-                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mt-3">
-                                    <div class="flex items-start justify-between gap-3">
-                                        <div class="min-w-0">
-                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.mtp') }}</span>
-                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.mtp_hint') | safe }}</p>
-                                            <p x-show="!modelSettings.mtp_compatible && modelSettings.mtp_compatibility_reason"
-                                               class="text-xs text-amber-600 mt-1"
-                                               x-text="modelSettings.mtp_compatibility_reason"></p>
-                                            <p x-show="modelSettings.mtp_compatible && (modelSettings.dflash_enabled || modelSettings.turboquant_kv_enabled)"
-                                               class="text-xs text-amber-600 mt-1">{{ t('modal.model_settings.mtp_conflict') }}</p>
-                                        </div>
-                                        <button type="button"
-                                                @click="if ((modelSettings.mtp_compatible || modelSettings.mtp_enabled) && !modelSettings.dflash_enabled && !modelSettings.turboquant_kv_enabled && !modelSettings.vlm_mtp_enabled) modelSettings.mtp_enabled = !modelSettings.mtp_enabled"
-                                                :disabled="(!modelSettings.mtp_compatible && !modelSettings.mtp_enabled) || modelSettings.dflash_enabled || modelSettings.turboquant_kv_enabled || modelSettings.vlm_mtp_enabled"
-                                                :title="(!modelSettings.mtp_compatible && !modelSettings.mtp_enabled)
-                                                            ? (modelSettings.mtp_compatibility_reason || 'Unsupported model')
-                                                            : (modelSettings.dflash_enabled
-                                                                ? '{{ t('modal.model_settings.mtp_conflict_dflash') }}'
-                                                                : (modelSettings.turboquant_kv_enabled
-                                                                    ? '{{ t('modal.model_settings.mtp_conflict_turboquant') }}'
-                                                                    : (modelSettings.vlm_mtp_enabled
-                                                                        ? '{{ t('modal.model_settings.vlm_mtp_conflict') }}'
-                                                                        : '')))"
-                                                :class="[
-                                                    modelSettings.mtp_enabled ? 'bg-black' : 'bg-neutral-200',
-                                                    ((!modelSettings.mtp_compatible && !modelSettings.mtp_enabled) || modelSettings.dflash_enabled || modelSettings.turboquant_kv_enabled || modelSettings.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
-                                                ]"
-                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                            <span :class="modelSettings.mtp_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                        </button>
-                                    </div>
-                                </div>
-
-                                <!-- VLM MTP (mlx-vlm 191d7c8+, gemma4_assistant drafter) -->
-                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mt-3">
-                                    <div class="flex items-start justify-between gap-3">
-                                        <div class="min-w-0">
-                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.vlm_mtp') }}</span>
-                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.vlm_mtp_hint') | safe }}</p>
-                                            <p x-show="modelSettings.dflash_enabled || modelSettings.specprefill_enabled || modelSettings.mtp_enabled || modelSettings.turboquant_kv_enabled"
-                                               class="text-xs text-amber-600 mt-1">{{ t('modal.model_settings.vlm_mtp_conflict') }}</p>
-                                        </div>
-                                        <button type="button"
-                                                @click="if (!modelSettings.is_paroquant && !modelSettings.dflash_enabled && !modelSettings.specprefill_enabled && !modelSettings.mtp_enabled && !modelSettings.turboquant_kv_enabled) modelSettings.vlm_mtp_enabled = !modelSettings.vlm_mtp_enabled"
-                                                :disabled="modelSettings.is_paroquant || modelSettings.dflash_enabled || modelSettings.specprefill_enabled || modelSettings.mtp_enabled || modelSettings.turboquant_kv_enabled"
-                                                :title="modelSettings.is_paroquant ? modelSettings.paroquant_reason : ''"
-                                                :class="[
-                                                    modelSettings.vlm_mtp_enabled ? 'bg-black' : 'bg-neutral-200',
-                                                    (modelSettings.is_paroquant || modelSettings.dflash_enabled || modelSettings.specprefill_enabled || modelSettings.mtp_enabled || modelSettings.turboquant_kv_enabled) ? 'opacity-40 cursor-not-allowed' : ''
-                                                ]"
-                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
-                                            <span :class="modelSettings.vlm_mtp_enabled ? 'translate-x-5' : 'translate-x-0'"
-                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
-                                        </button>
-                                    </div>
-                                    <div x-show="modelSettings.vlm_mtp_enabled" class="space-y-2">
-                                        <label class="block text-xs font-medium text-neutral-600">{{ t('modal.model_settings.vlm_mtp_draft_model') }}</label>
-                                        <select x-model="modelSettings.vlm_mtp_draft_model"
-                                                class="w-full px-3 py-2 text-sm border border-neutral-200 rounded-lg focus:outline-none focus:ring-2 focus:ring-black bg-white">
-                                            <option value="">{{ t('modal.model_settings.vlm_mtp_draft_model_placeholder') }}</option>
-                                            <template x-for="m in models.filter(m => /assistant/i.test(m.id))" :key="m.id">
-                                                <option :value="m.id" x-text="m.id"></option>
-                                            </template>
-                                        </select>
-                                        <label class="block text-xs font-medium text-neutral-600 mt-2">{{ t('modal.model_settings.vlm_mtp_block_size') }}</label>
-                                        <input type="number"
-                                               min="1"
-                                               max="16"
-                                               x-model.number="modelSettings.vlm_mtp_draft_block_size"
-                                               placeholder="4"
-                                               class="w-full px-3 py-2 text-sm border border-neutral-200 rounded-lg focus:outline-none focus:ring-2 focus:ring-black" />
-                                    </div>
-                                </div>
-                        </div>
-                    </div>
-                </div>
+                <!-- Settings fields (shared with bench-tab inline panels). -->
+                {% from "dashboard/_settings_fields.html" import settings_fields %}
+                {{ settings_fields('modelSettings') }}
 
                 <!-- Actions -->
                 <div class="flex items-center justify-end gap-3 mt-8">
diff --git a/omlx/admin/templates/dashboard/_settings_fields.html b/omlx/admin/templates/dashboard/_settings_fields.html
new file mode 100644
index 000000000..5f55e5e97
--- /dev/null
+++ b/omlx/admin/templates/dashboard/_settings_fields.html
@@ -0,0 +1,695 @@
+{#
+  Shared settings-fields macro for the model-settings modal and the bench-tab
+  inline panels. Renders the Basic + Advanced columns. Profile/template
+  management, modal chrome, and the Save/Cancel actions stay in the parent
+  template.
+
+  Args:
+    state            — Alpine state object holding the form values
+                       (e.g. "modelSettings", "benchSettings", "accSettings").
+    selected_model   — Alpine expression for the model currently being edited
+                       (e.g. "selectedModel", "benchSelectedModel").
+    mode             — "modal" or "panel". Modal-only chrome (the
+                       "Load Defaults" button) is hidden in panel mode so the
+                       macro doesn't depend on modal-scope helpers.
+#}
+{% macro settings_fields(state, selected_model='selectedModel', mode='modal') %}
+                <div class="flex flex-col lg:flex-row">
+                    <!-- Left: Basic Settings -->
+                    <div class="w-full lg:w-3/5 lg:pr-6 lg:border-r border-neutral-200">
+                        <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-5">{{ t('modal.model_settings.basic_label') }}</h3>
+                        <div class="space-y-5">
+                            <!-- Row 1: Model Alias / Model Type / Reasoning Parser -->
+                            <div class="grid grid-cols-1 gap-4" :class="reasoningParsers.length > 0 ? 'sm:grid-cols-3' : 'sm:grid-cols-2'">
+                                <div>
+                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.model_alias') }}</label>
+                                    <input type="text" x-model="{{ state }}.model_alias"
+                                           :placeholder="{{ selected_model }}?.id"
+                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                </div>
+                                <div>
+                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.model_type') }}</label>
+                                    <select x-model="{{ state }}.model_type_override"
+                                            class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                        <option value="">{{ t('modal.model_settings.model_type_auto') }}</option>
+                                        <option value="llm">LLM</option>
+                                        <option value="vlm">VLM</option>
+                                        <option value="embedding">Embedding</option>
+                                        <option value="reranker">Reranker</option>
+                                        <option value="audio_stt">Audio STT</option>
+                                        <option value="audio_tts">Audio TTS</option>
+                                        <option value="audio_sts">Audio STS</option>
+                                    </select>
+                                </div>
+                                <div x-show="reasoningParsers.length > 0">
+                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Reasoning Parser</label>
+                                    <select x-model="{{ state }}.reasoning_parser"
+                                            class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                        <option value="">None</option>
+                                        <template x-for="p in reasoningParsers" :key="p.value">
+                                            <option :value="p.value" x-text="p.label + ' (' + p.models.join(', ') + ')'"></option>
+                                        </template>
+                                    </select>
+                                </div>
+                            </div>
+
+                            <!-- Sampling settings (LLM/VLM only) -->
+                            <template x-if="!{{ selected_model }}?.model_type || {{ selected_model }}.model_type === 'llm' || {{ selected_model }}.model_type === 'vlm'">
+                                <div class="space-y-5">
+                                    <!-- Row 2: Ctx Window / Max Tokens / Temperature -->
+                                    <div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.max_context_window') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.max_context_window" placeholder="{{ t('modal.model_settings.placeholder_default') }}" min="1"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.max_tokens') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.max_tokens" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.temperature') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.temperature" step="0.1" min="0" max="2" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                    </div>
+
+                                    <!-- Row 3: Top P / Top K / Min P -->
+                                    <div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.top_p') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.top_p" step="0.05" min="0" max="1" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.top_k') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.top_k" min="0" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.min_p') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.min_p" step="0.01" min="0" max="1" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                    </div>
+
+                                    <!-- Row 4: Repetition Penalty / Presence Penalty -->
+                                    <div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.repetition_penalty') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.repetition_penalty" step="0.05" min="0" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.presence_penalty') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.presence_penalty" step="0.05" min="-2" max="2" placeholder="{{ t('modal.model_settings.placeholder_default') }}"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                    </div>
+                                </div>
+                            </template>
+
+                            <!-- TTL (all model types) -->
+                            <div class="grid grid-cols-1 sm:grid-cols-2 gap-4">
+                                <div>
+                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.ttl_seconds') }}</label>
+                                    <input type="number" x-model.number="{{ state }}.ttl_seconds"
+                                           :placeholder="ttlPlaceholder"
+                                           :disabled="{{ selected_model }}?.pinned"
+                                           min="0" step="60"
+                                           :class="{{ selected_model }}?.pinned ? 'bg-neutral-100 text-neutral-400 cursor-not-allowed' : ''"
+                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                </div>
+                                {% if mode == 'modal' %}
+                                <div class="flex items-end"
+                                     x-show="!{{ selected_model }}?.model_type || {{ selected_model }}.model_type === 'llm' || {{ selected_model }}.model_type === 'vlm'">
+                                    <button type="button" @click="loadGenerationDefaults"
+                                            :disabled="loadingGenDefaults"
+                                            class="w-full px-4 py-2.5 text-sm font-medium text-neutral-700 bg-neutral-100 hover:bg-neutral-200 rounded-xl border border-neutral-200 transition-all disabled:opacity-50">
+                                        <span x-show="!loadingGenDefaults">{{ t('modal.model_settings.load_defaults') }}</span>
+                                        <span x-show="loadingGenDefaults">{{ t('modal.model_settings.loading_defaults') }}</span>
+                                    </button>
+                                </div>
+                                {% endif %}
+                            </div>
+
+                            <p class="text-xs text-neutral-400 text-center"
+                               x-show="!{{ selected_model }}?.model_type || {{ selected_model }}.model_type === 'llm' || {{ selected_model }}.model_type === 'vlm'">{{ t('modal.model_settings.empty_hint') }}</p>
+                        </div>
+                    </div>
+
+                    <!-- Right: Advanced Settings -->
+                    <div class="w-full lg:w-2/5 lg:pl-6 mt-6 lg:mt-0 pt-6 lg:pt-0 border-t lg:border-t-0 border-neutral-200">
+                        <h3 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-5">{{ t('modal.model_settings.advanced_label') }}</h3>
+                        <div class="overflow-y-auto max-h-[28rem] space-y-4 pr-1"
+                             x-show="!{{ selected_model }}?.model_type || {{ selected_model }}.model_type === 'llm' || {{ selected_model }}.model_type === 'vlm'">
+                            <!-- Enable Thinking -->
+                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                <div class="flex items-start justify-between gap-3">
+                                    <div class="min-w-0">
+                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.enable_thinking') }}</span>
+                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.enable_thinking_hint') }}</p>
+                                    </div>
+                                    <button type="button" @click="
+                                            const effective = {{ state }}.enable_thinking != null ? {{ state }}.enable_thinking : ({{ state }}.thinking_default || false);
+                                            {{ state }}.enable_thinking = !effective;
+                                        "
+                                            :class="({{ state }}.enable_thinking != null ? {{ state }}.enable_thinking : ({{ state }}.thinking_default || false)) ? 'bg-black' : 'bg-neutral-200'"
+                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                        <span :class="({{ state }}.enable_thinking != null ? {{ state }}.enable_thinking : ({{ state }}.thinking_default || false)) ? 'translate-x-5' : 'translate-x-0'"
+                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                    </button>
+                                </div>
+                                <p x-show="{{ state }}.enable_thinking == null && {{ state }}.thinking_default != null"
+                                   class="text-xs text-neutral-400" x-text="'{{ t('modal.model_settings.enable_thinking_default') }}'"></p>
+                                <p x-show="{{ state }}.enable_thinking != null && {{ state }}.thinking_default != null && {{ state }}.enable_thinking !== {{ state }}.thinking_default"
+                                   class="text-xs text-amber-500 cursor-pointer hover:text-amber-600"
+                                   @click="{{ state }}.enable_thinking = null"
+                                   x-text="'{{ t('modal.model_settings.enable_thinking_reset') }}'"></p>
+                            </div>
+
+                            <!-- Thinking Budget -->
+                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                <div class="flex items-start justify-between gap-3">
+                                    <div class="min-w-0">
+                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.thinking_budget') }}</span>
+                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.thinking_budget_hint') }}</p>
+                                    </div>
+                                    <button type="button" @click="{{ state }}.enableThinkingBudget = !{{ state }}.enableThinkingBudget; if (!{{ state }}.enableThinkingBudget) {{ state }}.thinking_budget_tokens = null;"
+                                            :class="{{ state }}.enableThinkingBudget ? 'bg-black' : 'bg-neutral-200'"
+                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                        <span :class="{{ state }}.enableThinkingBudget ? 'translate-x-5' : 'translate-x-0'"
+                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                    </button>
+                                </div>
+                                <div x-show="{{ state }}.enableThinkingBudget" x-transition class="pt-1">
+                                    <input type="number" x-model.number="{{ state }}.thinking_budget_tokens" placeholder="{{ t('modal.model_settings.thinking_budget_placeholder') }}" min="1" step="100"
+                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                </div>
+                            </div>
+
+                            <!-- Limit Tool Result Tokens -->
+                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                <div class="flex items-start justify-between gap-3">
+                                    <div class="min-w-0">
+                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.limit_tool_result') }}</span>
+                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.limit_tool_result_hint') }}</p>
+                                    </div>
+                                    <button type="button" @click="{{ state }}.enableToolResultLimit = !{{ state }}.enableToolResultLimit; if (!{{ state }}.enableToolResultLimit) {{ state }}.max_tool_result_tokens = null;"
+                                            :class="{{ state }}.enableToolResultLimit ? 'bg-black' : 'bg-neutral-200'"
+                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                        <span :class="{{ state }}.enableToolResultLimit ? 'translate-x-5' : 'translate-x-0'"
+                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                    </button>
+                                </div>
+                                <div x-show="{{ state }}.enableToolResultLimit" x-transition class="pt-1">
+                                    <input type="number" x-model.number="{{ state }}.max_tool_result_tokens" placeholder="{{ t('modal.model_settings.limit_tool_placeholder') }}" min="100" step="100"
+                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                </div>
+                            </div>
+
+                            <!-- Force Sampling -->
+                            <div class="p-4 bg-neutral-50 rounded-xl">
+                                <div class="flex items-start justify-between gap-3">
+                                    <div class="min-w-0">
+                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.force_sampling') }}</span>
+                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.force_sampling_hint') }}</p>
+                                    </div>
+                                    <button type="button" @click="{{ state }}.force_sampling = !{{ state }}.force_sampling"
+                                            :class="{{ state }}.force_sampling ? 'bg-black' : 'bg-neutral-200'"
+                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                        <span :class="{{ state }}.force_sampling ? 'translate-x-5' : 'translate-x-0'"
+                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                    </button>
+                                </div>
+                            </div>
+
+                            <!-- Trust Remote Code (security-sensitive) -->
+                            <div class="p-4 bg-red-50 rounded-xl border border-red-100">
+                                <div class="flex items-start justify-between gap-3">
+                                    <div class="min-w-0">
+                                        <span class="text-sm font-medium text-red-700">{{ t('modal.model_settings.trust_remote_code') }}</span>
+                                        <p class="text-xs text-red-600 mt-0.5">{{ t('modal.model_settings.trust_remote_code_hint') }}</p>
+                                    </div>
+                                    <button type="button" @click="{{ state }}.trust_remote_code = !{{ state }}.trust_remote_code"
+                                            :class="{{ state }}.trust_remote_code ? 'bg-red-600' : 'bg-neutral-200'"
+                                            class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500">
+                                        <span :class="{{ state }}.trust_remote_code ? 'translate-x-5' : 'translate-x-0'"
+                                              class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                    </button>
+                                </div>
+                            </div>
+
+                            <!-- Chat Template Kwargs -->
+                            <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                <div class="flex items-center justify-between">
+                                    <div>
+                                        <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.chat_template_kwargs') }}</span>
+                                        <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.chat_template_kwargs_hint') }}</p>
+                                    </div>
+                                    <!-- Add dropdown -->
+                                    <div class="relative" x-data="{ addOpen: false }">
+                                        <button type="button" @click="addOpen = !addOpen"
+                                                class="text-xs text-neutral-500 hover:text-neutral-700 font-medium transition-colors">{{ t('modal.model_settings.add_kwarg') }}</button>
+                                        <div x-show="addOpen" @click.away="addOpen = false" x-transition
+                                             class="absolute right-0 mt-1 w-44 bg-white border border-neutral-200 rounded-lg shadow-lg z-10 py-1">
+                                            <button type="button"
+                                                    x-show="!{{ state }}.ctKwargEntries.some(e => e.type === 'enable_thinking')"
+                                                    @click="{{ state }}.ctKwargEntries.push({type: 'enable_thinking', value: 'true', force: false}); addOpen = false"
+                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_enable_thinking') }}</button>
+                                            <button type="button"
+                                                    x-show="!{{ state }}.ctKwargEntries.some(e => e.type === 'reasoning_effort')"
+                                                    @click="{{ state }}.ctKwargEntries.push({type: 'reasoning_effort', value: 'low', force: false}); addOpen = false"
+                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_reasoning_effort') }}</button>
+                                            <button type="button"
+                                                    @click="{{ state }}.ctKwargEntries.push({type: 'custom', key: '', value: '', force: false}); addOpen = false"
+                                                    class="w-full text-left px-3 py-1.5 text-sm text-neutral-700 hover:bg-neutral-50 transition-colors">{{ t('modal.model_settings.kwarg_custom') }}</button>
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- Dynamic entries -->
+                                <template x-for="(entry, idx) in {{ state }}.ctKwargEntries" :key="idx">
+                                    <div class="space-y-2">
+                                        <!-- Row 1: Label + Remove -->
+                                        <div class="flex items-center justify-between">
+                                            <template x-if="entry.type === 'enable_thinking'">
+                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_enable_thinking') }}</span>
+                                            </template>
+                                            <template x-if="entry.type === 'reasoning_effort'">
+                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_reasoning_effort') }}</span>
+                                            </template>
+                                            <template x-if="entry.type === 'custom'">
+                                                <span class="text-xs font-bold uppercase tracking-wider text-neutral-500">{{ t('modal.model_settings.kwarg_custom') }}</span>
+                                            </template>
+                                            <button type="button" @click="{{ state }}.ctKwargEntries.splice(idx, 1)"
+                                                    class="p-1 text-neutral-400 hover:text-red-500 transition-colors">
+                                                <svg xmlns="http://www.w3.org/2000/svg" class="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 6 6 18"/><path d="m6 6 12 12"/></svg>
+                                            </button>
+                                        </div>
+                                        <!-- Row 2: Value + Force -->
+                                        <div class="flex gap-2 items-center">
+                                            <template x-if="entry.type === 'enable_thinking'">
+                                                <select x-model="entry.value"
+                                                        class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="true">true</option>
+                                                    <option value="false">false</option>
+                                                </select>
+                                            </template>
+                                            <template x-if="entry.type === 'reasoning_effort'">
+                                                <select x-model="entry.value"
+                                                        class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="low">{{ t('modal.model_settings.kwarg_effort_low') }}</option>
+                                                    <option value="medium">{{ t('modal.model_settings.kwarg_effort_medium') }}</option>
+                                                    <option value="high">{{ t('modal.model_settings.kwarg_effort_high') }}</option>
+                                                </select>
+                                            </template>
+                                            <template x-if="entry.type !== 'custom'">
+                                                <label class="flex items-center gap-1 flex-shrink-0" title="{{ t('modal.model_settings.kwarg_force_tooltip') }}">
+                                                    <input type="checkbox" x-model="entry.force"
+                                                           class="w-3.5 h-3.5 rounded border-neutral-300 text-neutral-900 focus:ring-neutral-900">
+                                                    <span class="text-xs text-neutral-400">{{ t('modal.model_settings.kwarg_force') }}</span>
+                                                </label>
+                                            </template>
+                                        </div>
+                                        <!-- Row 2b: Custom key/value + Force (stacked) -->
+                                        <template x-if="entry.type === 'custom'">
+                                            <div class="space-y-2">
+                                                <input type="text" x-model="entry.key" placeholder="{{ t('modal.model_settings.kwarg_custom_key_placeholder') }}"
+                                                       class="w-full px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                                <div class="flex gap-2 items-center">
+                                                    <input type="text" x-model="entry.value" placeholder="{{ t('modal.model_settings.kwarg_custom_value_placeholder') }}"
+                                                           class="flex-1 px-3 py-1.5 border border-neutral-200 rounded-lg text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all min-w-0">
+                                                    <label class="flex items-center gap-1 flex-shrink-0" title="{{ t('modal.model_settings.kwarg_force_tooltip') }}">
+                                                        <input type="checkbox" x-model="entry.force"
+                                                               class="w-3.5 h-3.5 rounded border-neutral-300 text-neutral-900 focus:ring-neutral-900">
+                                                        <span class="text-xs text-neutral-400">{{ t('modal.model_settings.kwarg_force') }}</span>
+                                                    </label>
+                                                </div>
+                                            </div>
+                                        </template>
+                                    </div>
+                                </template>
+
+                                <!-- Empty state -->
+                                <p x-show="{{ state }}.ctKwargEntries.length === 0" class="text-xs text-neutral-400 text-center py-1">{{ t('modal.model_settings.no_kwargs') }}</p>
+                            </div>
+
+                            <!-- Experimental Features -->
+                            <h4 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-3">{{ t('modal.model_settings.experimental_label') }}</h4>
+
+                                <!-- TurboQuant KV Cache -->
+                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mb-3">
+                                    <div class="flex items-start justify-between gap-3">
+                                        <div class="min-w-0">
+                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.turboquant_kv') }}</span>
+                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.turboquant_kv_hint') }}</p>
+                                        </div>
+                                        <button type="button"
+                                                @click="if (!{{ state }}.is_paroquant && !{{ state }}.mtp_enabled && !{{ state }}.vlm_mtp_enabled) {{ state }}.turboquant_kv_enabled = !{{ state }}.turboquant_kv_enabled"
+                                                :disabled="{{ state }}.is_paroquant || {{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled"
+                                                :title="{{ state }}.is_paroquant ? {{ state }}.paroquant_reason : ({{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled ? '{{ t('modal.model_settings.mtp_conflict_turboquant') }}' : '')"
+                                                :class="[
+                                                    {{ state }}.turboquant_kv_enabled ? 'bg-black' : 'bg-neutral-200',
+                                                    ({{ state }}.is_paroquant || {{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
+                                                ]"
+                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                            <span :class="{{ state }}.turboquant_kv_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                        </button>
+                                    </div>
+                                    <div x-show="{{ state }}.turboquant_kv_enabled" x-transition class="pt-1">
+                                        <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.turboquant_kv_bits_label') }}</label>
+                                        <select x-model="{{ state }}.turboquant_kv_bits"
+                                                class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                            <option value="2">2-bit</option>
+                                            <option value="2.5">2.5-bit</option>
+                                            <option value="3">3-bit</option>
+                                            <option value="3.5">3.5-bit</option>
+                                            <option value="4">4-bit</option>
+                                            <option value="6">6-bit</option>
+                                            <option value="8">8-bit</option>
+                                        </select>
+                                    </div>
+                                </div>
+
+                                <!-- IndexCache (DSA models only) -->
+                                <template x-if="DSA_MODEL_TYPES.has({{ selected_model }}?.config_model_type || '')">
+                                    <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mb-3">
+                                        <div class="flex items-start justify-between gap-3">
+                                            <div class="min-w-0">
+                                                <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.index_cache') }}</span>
+                                                <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.index_cache_hint') }} (<a href="https://github.com/THUDM/IndexCache" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">GitHub</a>)</p>
+                                            </div>
+                                            <button type="button"
+                                                    @click="if (!{{ state }}.is_paroquant) { {{ state }}.enableIndexCache = !{{ state }}.enableIndexCache; if (!{{ state }}.enableIndexCache) {{ state }}.index_cache_freq = null; }"
+                                                    :disabled="{{ state }}.is_paroquant"
+                                                    :title="{{ state }}.is_paroquant ? {{ state }}.paroquant_reason : ''"
+                                                    :class="[
+                                                        {{ state }}.enableIndexCache ? 'bg-black' : 'bg-neutral-200',
+                                                        {{ state }}.is_paroquant ? 'opacity-40 cursor-not-allowed' : ''
+                                                    ]"
+                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                                <span :class="{{ state }}.enableIndexCache ? 'translate-x-5' : 'translate-x-0'"
+                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                            </button>
+                                        </div>
+                                        <div x-show="{{ state }}.enableIndexCache" x-transition class="pt-1">
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">{{ t('modal.model_settings.index_cache_freq') }}</label>
+                                            <input type="number" x-model.number="{{ state }}.index_cache_freq" min="2" max="16" step="1" placeholder="4"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                        </div>
+                                    </div>
+                                </template>
+
+                                <!-- SpecPrefill -->
+                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                    <div class="flex items-start justify-between gap-3">
+                                        <div class="min-w-0">
+                                            <span class="text-sm font-medium text-neutral-700">SpecPrefill</span>
+                                            <p class="text-xs text-neutral-500 mt-0.5">Attention-based sparse prefill for MoE/hybrid models. (<a href="https://arxiv.org/abs/2502.02789" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">Paper</a>) (<a href="https://huggingface.co/Thump604/specprefill-paper" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">HuggingFace</a>)</p>
+                                        </div>
+                                        <button type="button"
+                                                @click="if (!{{ state }}.is_paroquant && !{{ state }}.vlm_mtp_enabled) {{ state }}.specprefill_enabled = !{{ state }}.specprefill_enabled"
+                                                :disabled="{{ state }}.is_paroquant || {{ state }}.vlm_mtp_enabled"
+                                                :title="{{ state }}.is_paroquant ? {{ state }}.paroquant_reason : ({{ state }}.vlm_mtp_enabled ? '{{ t('modal.model_settings.vlm_mtp_conflict') }}' : '')"
+                                                :class="[
+                                                    {{ state }}.specprefill_enabled ? 'bg-black' : 'bg-neutral-200',
+                                                    ({{ state }}.is_paroquant || {{ state }}.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
+                                                ]"
+                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                            <span :class="{{ state }}.specprefill_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                        </button>
+                                    </div>
+                                    <div x-show="{{ state }}.specprefill_enabled" x-transition class="space-y-3 pt-1">
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft Model</label>
+                                            <select x-model="{{ state }}.specprefill_draft_model"
+                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                <option value="">Select draft model...</option>
+                                                <template x-for="m in models.filter(m => m.id !== {{ selected_model }}?.id)" :key="m.id">
+                                                    <option :value="m.model_path || m.id" x-text="m.id" :selected="{{ state }}.specprefill_draft_model === (m.model_path || m.id)"></option>
+                                                </template>
+                                            </select>
+                                            <p class="text-xs text-neutral-400 mt-1">Small model sharing tokenizer with target (e.g. Qwen3.5-0.8B for 35B)</p>
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Keep Rate</label>
+                                            <select x-model="{{ state }}.specprefill_keep_pct"
+                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                <option value="0.1">10% — Aggressive (~5-7x, some quality loss)</option>
+                                                <option value="0.2">20% — Balanced (~3x, recommended)</option>
+                                                <option value="0.25">25% — Conservative+ (~2.5x)</option>
+                                                <option value="0.3">30% — Conservative (~2.2x)</option>
+                                                <option value="0.4">40% — Mild (~1.8x)</option>
+                                                <option value="0.5">50% — Minimal (~1.5x)</option>
+                                            </select>
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Threshold (tokens)</label>
+                                            <input type="number" x-model.number="{{ state }}.specprefill_threshold" min="1024" max="131072" step="1024" placeholder="8192"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                            <p class="text-xs text-neutral-400 mt-1">Min tokens to trigger (shorter prompts use full prefill)</p>
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- DFlash -->
+                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3">
+                                    <div class="flex items-start justify-between gap-3">
+                                        <div class="min-w-0">
+                                            <span class="text-sm font-medium text-neutral-700">DFlash</span>
+                                            <p class="text-xs text-neutral-500 mt-0.5">Block diffusion speculative decoding for 3-4x faster generation. Supports Qwen (3, 3.5, 3.6) and Gemma4 model families. Requires a DFlash draft model checkpoint.<br><strong>Single-stream only: requests run one at a time.</strong><br>* MLX impl by bstnxbt(<a href="https://github.com/bstnxbt/dflash-mlx" target="_blank" rel="noopener" class="text-blue-500 hover:text-blue-700 underline">GitHub</a>)</p>
+                                            <p x-show="!{{ state }}.dflash_compatible && {{ state }}.dflash_compatibility_reason"
+                                               class="text-xs text-amber-600 mt-1"
+                                               x-text="{{ state }}.dflash_compatibility_reason"></p>
+                                        </div>
+                                        <button type="button"
+                                                @click="if ({{ state }}.dflash_compatible && !{{ state }}.mtp_enabled && !{{ state }}.vlm_mtp_enabled) {{ state }}.dflash_enabled = !{{ state }}.dflash_enabled"
+                                                :disabled="!{{ state }}.dflash_compatible || {{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled"
+                                                :title="{{ state }}.dflash_compatible ? ({{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled ? '{{ t('modal.model_settings.mtp_conflict') }}' : '') : ({{ state }}.dflash_compatibility_reason || 'Unsupported model')"
+                                                :class="[
+                                                    {{ state }}.dflash_enabled ? 'bg-black' : 'bg-neutral-200',
+                                                    (!{{ state }}.dflash_compatible || {{ state }}.mtp_enabled || {{ state }}.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
+                                                ]"
+                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                            <span :class="{{ state }}.dflash_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                        </button>
+                                    </div>
+                                    <div x-show="{{ state }}.dflash_enabled" x-transition class="space-y-3 pt-1">
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft Model</label>
+                                            <select x-model="{{ state }}.dflash_draft_model"
+                                                    class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                <option value="">Select draft model...</option>
+                                                <template x-for="m in models.filter(m => m.id !== {{ selected_model }}?.id)" :key="m.id">
+                                                    <option :value="m.model_path || m.id" x-text="m.id" :selected="{{ state }}.dflash_draft_model === (m.model_path || m.id)"></option>
+                                                </template>
+                                            </select>
+                                            <p class="text-xs text-neutral-400 mt-1">DFlash draft checkpoint (e.g. z-lab/Qwen3-4B-DFlash-b16, z-lab/gemma-4-26B-A4B-it-DFlash). Note: -DFlash suffix only; -assistant variants are for MTP.</p>
+                                        </div>
+                                        <div class="bg-neutral-50 rounded-xl">
+                                            <div class="flex items-start justify-between gap-3">
+                                                <div class="min-w-0">
+                                                    <span class="text-sm font-medium text-neutral-700">Quantization</span>
+                                                    <p class="text-xs text-neutral-500 mt-0.5">Enable quantization for the draft model (weight, activation bits &amp; group size).</p>
+                                                </div>
+                                                <button type="button" @click="{{ state }}.dflash_draft_quant_enabled = !{{ state }}.dflash_draft_quant_enabled"
+                                                        :class="{{ state }}.dflash_draft_quant_enabled ? 'bg-black' : 'bg-neutral-200'"
+                                                        class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                                    <span :class="{{ state }}.dflash_draft_quant_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                          class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                                </button>
+                                            </div>
+                                        </div>
+                                        <div class="grid grid-cols-3 gap-3 pl-4" x-show="{{ state }}.dflash_draft_quant_enabled" x-transition>
+                                            <div>
+                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Weight Bits</label>
+                                                <select x-model="{{ state }}.dflash_draft_quant_weight_bits"
+                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="2">2-bit</option>
+                                                    <option value="4" selected>4-bit</option>
+                                                    <option value="8">8-bit</option>
+                                                </select>
+                                            </div>
+                                            <div>
+                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Activation Bits</label>
+                                                <select x-model="{{ state }}.dflash_draft_quant_activation_bits"
+                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="16" selected>16-bit</option>
+                                                    <option value="32">32-bit</option>
+                                                </select>
+                                            </div>
+                                            <div>
+                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Group Size</label>
+                                                <select x-model="{{ state }}.dflash_draft_quant_group_size"
+                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="32">32</option>
+                                                    <option value="64" selected>64</option>
+                                                    <option value="128">128</option>
+                                                </select>
+                                            </div>
+                                        </div>
+                                        <div>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Max Context (fallback threshold)</label>
+                                            <input type="number" x-model.number="{{ state }}.dflash_max_ctx" min="1" step="1024" placeholder="unlimited"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                            <p class="text-xs text-neutral-400 mt-1">Prompts at or above this token count switch to BatchedEngine. Leave empty for unlimited.</p>
+                                        </div>
+                                        <div class="pt-2 border-t border-neutral-200 space-y-3">
+                                            <p class="text-xs font-bold uppercase tracking-wider text-neutral-500">Long-context tuning</p>
+                                            <div class="grid grid-cols-2 gap-3">
+                                                <div>
+                                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft window size</label>
+                                                    <input type="number" x-model.number="{{ state }}.dflash_draft_window_size" min="1" step="128" placeholder="1024"
+                                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                                    <p class="text-xs text-neutral-400 mt-1">Draft model sliding-attention window. Helps stabilise acceptance on long contexts. Leave empty for dflash default (1024).</p>
+                                                </div>
+                                                <div>
+                                                    <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft sink size</label>
+                                                    <input type="number" x-model.number="{{ state }}.dflash_draft_sink_size" min="0" step="8" placeholder="64"
+                                                           class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                                    <p class="text-xs text-neutral-400 mt-1">Attention-sink tokens always kept regardless of window. Leave empty for dflash default (64).</p>
+                                                </div>
+                                            </div>
+                                            <div>
+                                                <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Verify mode</label>
+                                                <select x-model="{{ state }}.dflash_verify_mode"
+                                                        class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
+                                                    <option value="adaptive">adaptive (default)</option>
+                                                    <option value="dflash">dflash</option>
+                                                    <option value="ddtree">ddtree</option>
+                                                    <option value="off">off</option>
+                                                </select>
+                                                <p class="text-xs text-neutral-400 mt-1">Verifier algorithm. "adaptive" shrinks block size when acceptance drops; "off" disables speculative verify.</p>
+                                            </div>
+                                        </div>
+                                        <div class="flex items-start justify-between gap-3 pt-2 border-t border-neutral-200">
+                                            <div class="min-w-0">
+                                                <span class="text-sm font-medium text-neutral-700">In-memory cache</span>
+                                                <p class="text-xs text-neutral-500 mt-0.5">DFlash L1 prefix snapshot cache in RAM. Speeds up multi-turn chats with shared prefixes.</p>
+                                            </div>
+                                            <button type="button" @click="{{ state }}.dflash_in_memory_cache = !{{ state }}.dflash_in_memory_cache; if (!{{ state }}.dflash_in_memory_cache) {{ state }}.dflash_ssd_cache = false"
+                                                    :class="{{ state }}.dflash_in_memory_cache ? 'bg-black' : 'bg-neutral-200'"
+                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                                <span :class="{{ state }}.dflash_in_memory_cache ? 'translate-x-5' : 'translate-x-0'"
+                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                            </button>
+                                        </div>
+                                        <div x-show="{{ state }}.dflash_in_memory_cache" x-transition>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">In-memory cache max entries</label>
+                                            <input type="number" x-model.number="{{ state }}.dflash_in_memory_cache_max_entries" min="1" max="128" step="1" placeholder="4"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                            <p class="text-xs text-neutral-400 mt-1">Maximum number of prefix snapshots kept in L1 cache. Each entry stores KV + draft GDN state for one conversation prefix.</p>
+                                        </div>
+                                        <div x-show="{{ state }}.dflash_in_memory_cache" x-transition>
+                                            <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">In-memory cache size (GiB)</label>
+                                            <input type="number" x-model.number="{{ state }}.dflash_in_memory_cache_max_gib" min="1" max="256" step="1" placeholder="8"
+                                                   class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all">
+                                            <p class="text-xs text-neutral-400 mt-1">Byte budget for L1 snapshots; LRU evicts when exceeded.</p>
+                                        </div>
+                                        <div class="flex items-start justify-between gap-3">
+                                            <div class="min-w-0">
+                                                <span class="text-sm font-medium text-neutral-700">SSD cache</span>
+                                                <p class="text-xs text-neutral-500 mt-0.5">L2 spill of evicted L1 entries to disk. Uses the oMLX paged SSD cache directory (<code>dflash_l2/</code>).</p>
+                                                <p x-show="!{{ state }}.dflash_ssd_cache_available" class="text-xs text-amber-600 mt-1">Enable oMLX paged SSD cache first (<code>--paged-ssd-cache-dir</code>).</p>
+                                                <p x-show="{{ state }}.dflash_ssd_cache_available && !{{ state }}.dflash_in_memory_cache" class="text-xs text-amber-600 mt-1">Requires in-memory cache to be enabled.</p>
+                                            </div>
+                                            <button type="button"
+                                                    @click="if ({{ state }}.dflash_ssd_cache_available && {{ state }}.dflash_in_memory_cache) {{ state }}.dflash_ssd_cache = !{{ state }}.dflash_ssd_cache"
+                                                    :disabled="!{{ state }}.dflash_ssd_cache_available || !{{ state }}.dflash_in_memory_cache"
+                                                    :class="[
+                                                        {{ state }}.dflash_ssd_cache ? 'bg-black' : 'bg-neutral-200',
+                                                        (!{{ state }}.dflash_ssd_cache_available || !{{ state }}.dflash_in_memory_cache) ? 'opacity-40 cursor-not-allowed' : ''
+                                                    ]"
+                                                    class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                                <span :class="{{ state }}.dflash_ssd_cache ? 'translate-x-5' : 'translate-x-0'"
+                                                      class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                            </button>
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- Native MTP (mlx-lm PR 990 + PR 15 monkey-patch) -->
+                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mt-3">
+                                    <div class="flex items-start justify-between gap-3">
+                                        <div class="min-w-0">
+                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.mtp') }}</span>
+                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.mtp_hint') | safe }}</p>
+                                            <p x-show="!{{ state }}.mtp_compatible && {{ state }}.mtp_compatibility_reason"
+                                               class="text-xs text-amber-600 mt-1"
+                                               x-text="{{ state }}.mtp_compatibility_reason"></p>
+                                            <p x-show="{{ state }}.mtp_compatible && ({{ state }}.dflash_enabled || {{ state }}.turboquant_kv_enabled)"
+                                               class="text-xs text-amber-600 mt-1">{{ t('modal.model_settings.mtp_conflict') }}</p>
+                                        </div>
+                                        <button type="button"
+                                                @click="if (({{ state }}.mtp_compatible || {{ state }}.mtp_enabled) && !{{ state }}.dflash_enabled && !{{ state }}.turboquant_kv_enabled && !{{ state }}.vlm_mtp_enabled) {{ state }}.mtp_enabled = !{{ state }}.mtp_enabled"
+                                                :disabled="(!{{ state }}.mtp_compatible && !{{ state }}.mtp_enabled) || {{ state }}.dflash_enabled || {{ state }}.turboquant_kv_enabled || {{ state }}.vlm_mtp_enabled"
+                                                :title="(!{{ state }}.mtp_compatible && !{{ state }}.mtp_enabled)
+                                                            ? ({{ state }}.mtp_compatibility_reason || 'Unsupported model')
+                                                            : ({{ state }}.dflash_enabled
+                                                                ? '{{ t('modal.model_settings.mtp_conflict_dflash') }}'
+                                                                : ({{ state }}.turboquant_kv_enabled
+                                                                    ? '{{ t('modal.model_settings.mtp_conflict_turboquant') }}'
+                                                                    : ({{ state }}.vlm_mtp_enabled
+                                                                        ? '{{ t('modal.model_settings.vlm_mtp_conflict') }}'
+                                                                        : '')))"
+                                                :class="[
+                                                    {{ state }}.mtp_enabled ? 'bg-black' : 'bg-neutral-200',
+                                                    ((!{{ state }}.mtp_compatible && !{{ state }}.mtp_enabled) || {{ state }}.dflash_enabled || {{ state }}.turboquant_kv_enabled || {{ state }}.vlm_mtp_enabled) ? 'opacity-40 cursor-not-allowed' : ''
+                                                ]"
+                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                            <span :class="{{ state }}.mtp_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                        </button>
+                                    </div>
+                                </div>
+
+                                <!-- VLM MTP (mlx-vlm 191d7c8+, gemma4_assistant drafter) -->
+                                <div class="p-4 bg-neutral-50 rounded-xl space-y-3 mt-3">
+                                    <div class="flex items-start justify-between gap-3">
+                                        <div class="min-w-0">
+                                            <span class="text-sm font-medium text-neutral-700">{{ t('modal.model_settings.vlm_mtp') }}</span>
+                                            <p class="text-xs text-neutral-500 mt-0.5">{{ t('modal.model_settings.vlm_mtp_hint') | safe }}</p>
+                                            <p x-show="{{ state }}.dflash_enabled || {{ state }}.specprefill_enabled || {{ state }}.mtp_enabled || {{ state }}.turboquant_kv_enabled"
+                                               class="text-xs text-amber-600 mt-1">{{ t('modal.model_settings.vlm_mtp_conflict') }}</p>
+                                        </div>
+                                        <button type="button"
+                                                @click="if (!{{ state }}.is_paroquant && !{{ state }}.dflash_enabled && !{{ state }}.specprefill_enabled && !{{ state }}.mtp_enabled && !{{ state }}.turboquant_kv_enabled) {{ state }}.vlm_mtp_enabled = !{{ state }}.vlm_mtp_enabled"
+                                                :disabled="{{ state }}.is_paroquant || {{ state }}.dflash_enabled || {{ state }}.specprefill_enabled || {{ state }}.mtp_enabled || {{ state }}.turboquant_kv_enabled"
+                                                :title="{{ state }}.is_paroquant ? {{ state }}.paroquant_reason : ''"
+                                                :class="[
+                                                    {{ state }}.vlm_mtp_enabled ? 'bg-black' : 'bg-neutral-200',
+                                                    ({{ state }}.is_paroquant || {{ state }}.dflash_enabled || {{ state }}.specprefill_enabled || {{ state }}.mtp_enabled || {{ state }}.turboquant_kv_enabled) ? 'opacity-40 cursor-not-allowed' : ''
+                                                ]"
+                                                class="relative flex-shrink-0 w-11 h-6 mt-0.5 rounded-full transition-colors duration-300 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-black">
+                                            <span :class="{{ state }}.vlm_mtp_enabled ? 'translate-x-5' : 'translate-x-0'"
+                                                  class="block w-5 h-5 bg-white rounded-full shadow-sm transform transition-transform duration-300 absolute top-0.5 left-0.5"></span>
+                                        </button>
+                                    </div>
+                                    <div x-show="{{ state }}.vlm_mtp_enabled" class="space-y-2">
+                                        <label class="block text-xs font-medium text-neutral-600">{{ t('modal.model_settings.vlm_mtp_draft_model') }}</label>
+                                        <select x-model="{{ state }}.vlm_mtp_draft_model"
+                                                class="w-full px-3 py-2 text-sm border border-neutral-200 rounded-lg focus:outline-none focus:ring-2 focus:ring-black bg-white">
+                                            <option value="">{{ t('modal.model_settings.vlm_mtp_draft_model_placeholder') }}</option>
+                                            <template x-for="m in models.filter(m => /assistant/i.test(m.id))" :key="m.id">
+                                                <option :value="m.id" x-text="m.id"></option>
+                                            </template>
+                                        </select>
+                                        <label class="block text-xs font-medium text-neutral-600 mt-2">{{ t('modal.model_settings.vlm_mtp_block_size') }}</label>
+                                        <input type="number"
+                                               min="1"
+                                               max="16"
+                                               x-model.number="{{ state }}.vlm_mtp_draft_block_size"
+                                               placeholder="4"
+                                               class="w-full px-3 py-2 text-sm border border-neutral-200 rounded-lg focus:outline-none focus:ring-2 focus:ring-black" />
+                                    </div>
+                                </div>
+                        </div>
+                    </div>
+                </div>
+
+{% endmacro %}
diff --git a/omlx/model_settings.py b/omlx/model_settings.py
index 4140fcbc8..d83d97169 100644
--- a/omlx/model_settings.py
+++ b/omlx/model_settings.py
@@ -5,13 +5,14 @@
 flags, and metadata.
 """
 
+import contextlib
 import copy
 import json
 import logging
 import threading
 from dataclasses import dataclass, field, fields
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Iterator, Optional
 
 from .model_profiles import (
     filter_profile_fields,
@@ -266,6 +267,11 @@ def __init__(self, base_path: Path):
         self._settings: Dict[str, ModelSettings] = {}
         self._profiles: Dict[str, Dict[str, Dict[str, Any]]] = {}
         self._templates: Dict[str, Dict[str, Any]] = {}
+        # Ephemeral override layers keyed by model_id. Each value is a list of
+        # (token, override_dict) tuples; later entries win during merge.
+        # Tokens identify which entry to pop on exit so out-of-order context
+        # managers don't corrupt each other's state.
+        self._overrides: Dict[str, list[tuple[object, Dict[str, Any]]]] = {}
 
         # Ensure base directory exists
         self.base_path.mkdir(parents=True, exist_ok=True)
@@ -346,6 +352,11 @@ def _save(self) -> None:
     def get_settings(self, model_id: str) -> ModelSettings:
         """Get settings for a specific model.
 
+        Returns persisted settings merged with any active ephemeral overrides
+        from ``ephemeral_overrides``. Override layers stack (later wins), and
+        a key set to ``None`` in an override is treated as "use the layer
+        beneath this one" so callers can express "don't touch this field".
+
         Args:
             model_id: The model identifier.
 
@@ -354,11 +365,67 @@ def get_settings(self, model_id: str) -> ModelSettings:
         """
         with self._lock:
             if model_id in self._settings:
-                # Return a copy to prevent external modification
-                settings = self._settings[model_id]
-                return ModelSettings.from_dict(settings.to_dict())
+                base = self._settings[model_id].to_dict()
+            else:
+                base = ModelSettings().to_dict()
+
+            for _token, layer in self._overrides.get(model_id, ()):
+                for key, value in layer.items():
+                    if value is None:
+                        continue
+                    base[key] = value
+
+            return ModelSettings.from_dict(base)
+
+    @contextlib.contextmanager
+    def ephemeral_overrides(
+        self, model_id: str, overrides: Optional[Dict[str, Any]]
+    ) -> Iterator[None]:
+        """Apply overrides on top of persisted settings for ``model_id``.
+
+        Inside the ``with`` block, ``get_settings(model_id)`` returns the
+        persisted settings shallow-merged with ``overrides`` (override values
+        win, except ``None`` which defers to the layer beneath). On exit —
+        normal or via exception — the overrides are removed.
+
+        Unknown keys (i.e. not fields of :class:`ModelSettings`) are dropped
+        with a warning. Stacking is supported; exits are matched by token, so
+        out-of-order exits are safe.
+
+        Yields ``None`` for use as a context manager. When ``overrides`` is
+        ``None`` or empty the context is a no-op.
+        """
+        if not overrides:
+            yield
+            return
 
-            return ModelSettings()
+        valid_keys = {f.name for f in fields(ModelSettings)}
+        unknown = [k for k in overrides if k not in valid_keys]
+        if unknown:
+            logger.warning(
+                f"ephemeral_overrides: dropping unknown keys for "
+                f"'{model_id}': {unknown}"
+            )
+        cleaned = {k: v for k, v in overrides.items() if k in valid_keys}
+        if not cleaned:
+            yield
+            return
+
+        token = object()
+        with self._lock:
+            self._overrides.setdefault(model_id, []).append((token, cleaned))
+
+        try:
+            yield
+        finally:
+            with self._lock:
+                stack = self._overrides.get(model_id)
+                if stack is not None:
+                    self._overrides[model_id] = [
+                        entry for entry in stack if entry[0] is not token
+                    ]
+                    if not self._overrides[model_id]:
+                        del self._overrides[model_id]
 
     def set_settings(self, model_id: str, settings: ModelSettings) -> None:
         """Set settings for a specific model.
diff --git a/tests/test_accuracy_benchmark.py b/tests/test_accuracy_benchmark.py
index 8159b0121..9ff26f2b4 100644
--- a/tests/test_accuracy_benchmark.py
+++ b/tests/test_accuracy_benchmark.py
@@ -75,6 +75,21 @@ def test_enable_thinking_true(self):
         )
         assert req.enable_thinking is True
 
+    def test_settings_override_defaults_to_none(self):
+        req = AccuracyBenchmarkRequest(
+            model_id="test-model",
+            benchmarks={"mmlu": 100},
+        )
+        assert req.settings_override is None
+
+    def test_settings_override_accepts_dict(self):
+        req = AccuracyBenchmarkRequest(
+            model_id="test-model",
+            benchmarks={"mmlu": 100},
+            settings_override={"temperature": 0.0, "top_p": 1.0},
+        )
+        assert req.settings_override == {"temperature": 0.0, "top_p": 1.0}
+
 
 class TestQueueAndResults:
     def setup_method(self):
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index bb519e395..8506c8821 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -84,6 +84,29 @@ def test_default_generation_length(self):
         )
         assert req.generation_length == 128
 
+    def test_settings_override_defaults_to_none(self):
+        req = BenchmarkRequest(
+            model_id="test-model",
+            prompt_lengths=[1024],
+        )
+        assert req.settings_override is None
+
+    def test_settings_override_accepts_dict(self):
+        req = BenchmarkRequest(
+            model_id="test-model",
+            prompt_lengths=[1024],
+            settings_override={
+                "temperature": 0.1,
+                "turboquant_kv_enabled": True,
+                "turboquant_kv_bits": 4,
+            },
+        )
+        assert req.settings_override == {
+            "temperature": 0.1,
+            "turboquant_kv_enabled": True,
+            "turboquant_kv_bits": 4,
+        }
+
 
 # =============================================================================
 # Prompt generation tests
diff --git a/tests/test_model_settings.py b/tests/test_model_settings.py
index 661b4483e..f9c0e0713 100644
--- a/tests/test_model_settings.py
+++ b/tests/test_model_settings.py
@@ -469,3 +469,227 @@ def worker(model_id):
                 t.join()
 
             assert len(errors) == 0
+
+
+class TestEphemeralOverrides:
+    """Tests for ModelSettingsManager.ephemeral_overrides context manager.
+
+    The override layer is the foundation of the bench-tab inline settings
+    panel: it lets a benchmark run apply per-run overrides without writing
+    to model_settings.json.
+    """
+
+    def test_noop_when_overrides_empty(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            with manager.ephemeral_overrides("m", None):
+                assert manager.get_settings("m").temperature == 0.5
+            with manager.ephemeral_overrides("m", {}):
+                assert manager.get_settings("m").temperature == 0.5
+
+    def test_overrides_apply_inside_and_revert_after(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5, top_p=0.9))
+
+            with manager.ephemeral_overrides("m", {"temperature": 0.1}):
+                s = manager.get_settings("m")
+                assert s.temperature == 0.1
+                # Untouched fields keep persisted values.
+                assert s.top_p == 0.9
+
+            after = manager.get_settings("m")
+            assert after.temperature == 0.5
+            assert after.top_p == 0.9
+
+    def test_persisted_file_unchanged(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+            settings_path = Path(tmpdir) / "model_settings.json"
+            before = settings_path.read_text()
+
+            with manager.ephemeral_overrides(
+                "m", {"temperature": 0.1, "top_p": 0.7}
+            ):
+                pass
+
+            assert settings_path.read_text() == before
+
+    def test_overrides_apply_to_model_with_no_persisted_settings(self):
+        """Engine-init flag overrides should work even for fresh models."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+
+            with manager.ephemeral_overrides(
+                "fresh", {"turboquant_kv_enabled": True, "turboquant_kv_bits": 4}
+            ):
+                s = manager.get_settings("fresh")
+                assert s.turboquant_kv_enabled is True
+                assert s.turboquant_kv_bits == 4
+
+            # And the manager has no persisted state for this model.
+            assert manager.get_settings("fresh").turboquant_kv_enabled is False
+
+    def test_unknown_keys_dropped(self, caplog):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            with caplog.at_level("WARNING"):
+                with manager.ephemeral_overrides(
+                    "m", {"temperature": 0.1, "totally_made_up_key": 42}
+                ):
+                    s = manager.get_settings("m")
+                    assert s.temperature == 0.1
+                    assert not hasattr(s, "totally_made_up_key")
+
+            assert any("totally_made_up_key" in r.message for r in caplog.records)
+
+    def test_none_value_defers_to_lower_layer(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            with manager.ephemeral_overrides("m", {"temperature": None}):
+                # None means "don't override this field" — persisted wins.
+                assert manager.get_settings("m").temperature == 0.5
+
+    def test_nested_overrides_inner_wins_then_outer_restored(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            with manager.ephemeral_overrides("m", {"temperature": 0.2}):
+                assert manager.get_settings("m").temperature == 0.2
+                with manager.ephemeral_overrides("m", {"temperature": 0.1}):
+                    assert manager.get_settings("m").temperature == 0.1
+                # After inner exits, outer override is back in effect.
+                assert manager.get_settings("m").temperature == 0.2
+            # After outer exits, persisted wins.
+            assert manager.get_settings("m").temperature == 0.5
+
+    def test_out_of_order_exit_uses_token(self):
+        """Two overlapping overrides exited out of LIFO order shouldn't corrupt state."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            outer = manager.ephemeral_overrides("m", {"temperature": 0.2})
+            inner = manager.ephemeral_overrides("m", {"temperature": 0.1})
+            outer.__enter__()
+            inner.__enter__()
+            assert manager.get_settings("m").temperature == 0.1
+
+            # Exit outer first (out of LIFO order).
+            outer.__exit__(None, None, None)
+            # Inner is still active, so its temperature wins.
+            assert manager.get_settings("m").temperature == 0.1
+
+            inner.__exit__(None, None, None)
+            assert manager.get_settings("m").temperature == 0.5
+
+    def test_overrides_released_on_exception(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(temperature=0.5))
+
+            with pytest.raises(RuntimeError):
+                with manager.ephemeral_overrides("m", {"temperature": 0.1}):
+                    assert manager.get_settings("m").temperature == 0.1
+                    raise RuntimeError("boom")
+
+            assert manager.get_settings("m").temperature == 0.5
+            # Internal stack is empty.
+            assert "m" not in manager._overrides
+
+    def test_overrides_isolated_per_model(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("a", ModelSettings(temperature=0.5))
+            manager.set_settings("b", ModelSettings(temperature=0.7))
+
+            with manager.ephemeral_overrides("a", {"temperature": 0.1}):
+                assert manager.get_settings("a").temperature == 0.1
+                # Overrides for "a" don't leak into "b".
+                assert manager.get_settings("b").temperature == 0.7
+
+    def test_engine_init_flag_overrides(self):
+        """TurboQuant/DFlash/MTP overrides should compose just like sampling."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(turboquant_kv_enabled=False))
+
+            with manager.ephemeral_overrides(
+                "m",
+                {
+                    "turboquant_kv_enabled": True,
+                    "turboquant_kv_bits": 3,
+                    "dflash_enabled": True,
+                },
+            ):
+                s = manager.get_settings("m")
+                assert s.turboquant_kv_enabled is True
+                assert s.turboquant_kv_bits == 3
+                assert s.dflash_enabled is True
+
+            after = manager.get_settings("m")
+            assert after.turboquant_kv_enabled is False
+            assert after.dflash_enabled is False
+
+    def test_override_respects_mutual_exclusion_constraints(self):
+        """ModelSettings rejects mtp_enabled=True with dflash_enabled=True.
+
+        get_settings runs the merged dict through ModelSettings.from_dict,
+        which triggers __post_init__ validation. An override that creates
+        an invalid combination should surface as an exception from
+        get_settings — not silently succeed with a corrupted state.
+        """
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            manager.set_settings("m", ModelSettings(mtp_enabled=True))
+
+            # Override turns on dflash_enabled while persisted has
+            # mtp_enabled=True — invalid combo per ModelSettings.__post_init__.
+            with manager.ephemeral_overrides("m", {"dflash_enabled": True}):
+                with pytest.raises(Exception):
+                    manager.get_settings("m")
+
+            # Override is still released after the exception path.
+            assert "m" not in manager._overrides
+
+    def test_thread_safe_concurrent_overrides(self):
+        """Concurrent overrides on different models don't corrupt each other."""
+        import threading
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            manager = ModelSettingsManager(Path(tmpdir))
+            errors: list[Exception] = []
+
+            def worker(model_id: str, target_temp: float) -> None:
+                try:
+                    for _ in range(20):
+                        with manager.ephemeral_overrides(
+                            model_id, {"temperature": target_temp}
+                        ):
+                            assert (
+                                manager.get_settings(model_id).temperature
+                                == target_temp
+                            )
+                except Exception as e:
+                    errors.append(e)
+
+            threads = [
+                threading.Thread(target=worker, args=(f"m{i}", i / 10))
+                for i in range(8)
+            ]
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+
+            assert errors == []
+            # All override stacks released.
+            assert manager._overrides == {}