From 12ad98989f6ec2bcabd568bf2e7c202da919e34f Mon Sep 17 00:00:00 2001 From: Andrew Szeto Date: Sun, 3 May 2026 19:04:14 -0700 Subject: [PATCH] feat(bench): Add opt-out toggle for community benchmark upload Throughput benchmarks upload results to omlx.ai unconditionally at the end of every run, including a stable per-device hash and the model name (which can be a private/local model id). There is no UI toggle, env var, or request flag to disable it. This commit adds a "Share results with the omlx.ai community" checkbox to the throughput benchmark config card, checked by default so existing user behavior is unchanged. The frontend sends the user's choice as should_upload_results on POST /api/bench/start; the run_benchmark coroutine gates the call to _upload_to_omlx_ai on that flag. When the box is unchecked, the SSE 'done' handler finalizes the run directly instead of entering the "Uploading..." UI state. The hint text spells out exactly what gets uploaded and notes the common reason to uncheck (benchmarking a private model whose name would otherwise be included in the payload). The BenchmarkRequest field defaults to True so any direct API caller keeps current behavior. The accuracy benchmark has no upload path and is unaffected. --- omlx/admin/benchmark.py | 35 ++++++++++++---------- omlx/admin/i18n/en.json | 3 ++ omlx/admin/static/js/dashboard.js | 27 ++++++++++++----- omlx/admin/templates/dashboard/_bench.html | 12 ++++++++ 4 files changed, 54 insertions(+), 23 deletions(-) diff --git a/omlx/admin/benchmark.py b/omlx/admin/benchmark.py index 9a0854c6a..bc8378b91 100644 --- a/omlx/admin/benchmark.py +++ b/omlx/admin/benchmark.py @@ -43,6 +43,7 @@ class BenchmarkRequest(BaseModel): prompt_lengths: list[int] generation_length: int = 128 batch_sizes: list[int] = [] + should_upload_results: bool = True @field_validator("prompt_lengths") @classmethod def validate_prompt_lengths(cls, v: list[int]) -> list[int]: @@ -732,21 +733,25 @@ async def run_benchmark(run: BenchmarkRun, engine_pool: Any) -> None: }, }) - # Upload results to omlx.ai (failures don't affect benchmark status) - try: - await _upload_to_omlx_ai(run, engine_pool) - except Exception as e: - logger.warning(f"Benchmark upload to omlx.ai failed: {e}") - await _send_event(run, { - "type": "upload_done", - "data": { - "owner_hash": None, - "total": 0, - "success": 0, - "failed": 0, - "error": str(e), - }, - }) + # Upload results to omlx.ai (failures don't affect benchmark status). + # Skipped when the user opted out via the dashboard checkbox; the + # frontend's 'done' handler finalizes directly in that case, so no + # additional SSE event is needed here. + if run.request.should_upload_results: + try: + await _upload_to_omlx_ai(run, engine_pool) + except Exception as e: + logger.warning(f"Benchmark upload to omlx.ai failed: {e}") + await _send_event(run, { + "type": "upload_done", + "data": { + "owner_hash": None, + "total": 0, + "success": 0, + "failed": 0, + "error": str(e), + }, + }) except asyncio.CancelledError: run.status = "cancelled" diff --git a/omlx/admin/i18n/en.json b/omlx/admin/i18n/en.json index aa12cc072..237696ac6 100644 --- a/omlx/admin/i18n/en.json +++ b/omlx/admin/i18n/en.json @@ -494,6 +494,9 @@ "bench.config.generation_hint": "Generation length: 128 tokens (fixed)", "bench.config.batch_tests": "Continuous Batching Tests", "bench.config.batch_hint": "Batch tests use pp1024 / tg128", + "bench.config.share_results": "Share results with the omlx.ai community", + "bench.config.share_results_hint": "Uploads chip, RAM, GPU, OS version, model name, quantization, and performance numbers to omlx.ai/api/benchmarks. Includes a stable per-device hash so you can find your submissions at omlx.ai/my/. Uncheck to skip the upload for this run (e.g. when benchmarking a private model).", + "bench.upload.in_progress": "Uploading to community benchmarks...", "bench.config.run_button": "Run Benchmark", "bench.progress.preparing": "Preparing...", "bench.progress.cancel": "Cancel", diff --git a/omlx/admin/static/js/dashboard.js b/omlx/admin/static/js/dashboard.js index e454bc2ee..aa357a06f 100644 --- a/omlx/admin/static/js/dashboard.js +++ b/omlx/admin/static/js/dashboard.js @@ -332,6 +332,7 @@ benchModelId: '', benchPromptLengths: { 1024: true, 4096: true, 8192: false, 16384: false, 32768: false, 65536: false, 131072: false, 200000: false }, benchBatchSizes: { 2: true, 4: true, 8: false }, + benchShouldUploadResults: true, benchRunning: false, benchBenchId: null, benchProgress: null, @@ -2098,6 +2099,7 @@ prompt_lengths: promptLengths, generation_length: 128, batch_sizes: batchSizes, + should_upload_results: this.benchShouldUploadResults, }), }); @@ -2149,15 +2151,24 @@ this.benchBatchResults = [...this.benchBatchResults, data.data]; } } else if (data.type === 'done') { - // Benchmark tests done, uploading starts - this.benchUploading = true; - this.benchProgress = { - phase: 'upload', - message: 'Uploading to community benchmarks...', - current: 0, - total: 0, - }; + // Benchmark tests done. If the user opted in to + // sharing results, the server follows up with + // upload events; otherwise we finalize here. this.loadModels(); + if (this.benchShouldUploadResults) { + this.benchUploading = true; + this.benchProgress = { + phase: 'upload', + message: window.t('bench.upload.in_progress'), + current: 0, + total: 0, + }; + } else { + this.benchRunning = false; + this.benchProgress = null; + es.close(); + this.benchEventSource = null; + } } else if (data.type === 'upload') { this.benchUploadResults = [...this.benchUploadResults, data.data]; } else if (data.type === 'upload_done') { diff --git a/omlx/admin/templates/dashboard/_bench.html b/omlx/admin/templates/dashboard/_bench.html index 2e5bc036a..3dea82971 100644 --- a/omlx/admin/templates/dashboard/_bench.html +++ b/omlx/admin/templates/dashboard/_bench.html @@ -109,6 +109,18 @@

{{ t('bench.headi

{{ t('bench.config.batch_hint') }}

+ +
+ +

{{ t('bench.config.share_results_hint') }}

+
+