diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e709fc6..fd9168ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Tooling + +- **Benchmark report scatter plots now assign a unique color per model.** `_render_pareto`, `_render_precision_recall_chart`, and `_render_token_efficiency_chart` were drawing from `tab20` with `i % 20`, so any run with more than 20 models silently reused colors and made the legend ambiguous. Added `_distinct_colors(n)` that concatenates `tab20 + tab20b + tab20c` (60 categorical colors) and falls back to evenly-spaced `hsv` past that. Benchmark-only change; not shipped in the runtime image, no version bump. + ## [2.4.8] - 2026-05-15 ### Security diff --git a/benchmarks/llm/results/report.md b/benchmarks/llm/results/report.md index fd5c1469..06e4ec0b 100644 --- a/benchmarks/llm/results/report.md +++ b/benchmarks/llm/results/report.md @@ -1097,37 +1097,37 @@ One subsection per episode in the corpus, showing how every model performed on t | Model | Result | FP count | |-------|--------|----------| -| `nvidia/llama-3.3-nemotron-super-49b-v1.5` | PASS | 0 | -| `openai/o4-mini` | PASS | 0 | -| `claude-sonnet-4-6` | PASS | 0 | -| `google/gemini-2.5-flash` | PASS | 0 | -| `meta-llama/llama-3.1-8b-instruct` | PASS | 0 | -| `mistralai/codestral-2508` | PASS | 0 | +| `claude-opus-4-7` | PASS | 0 | +| `mistralai/mistral-large-2512` | PASS | 0 | | `claude-haiku-4-5-20251001` | PASS | 0 | -| `openai/o3` | PASS | 0 | +| `deepseek/deepseek-v3.2` | PASS | 0 | +| `google/gemini-2.5-flash` | PASS | 0 | | `meta-llama/llama-4-scout` | PASS | 0 | -| `mistralai/mistral-medium-3.1` | PASS | 0 | -| `mistralai/mistral-large-2512` | PASS | 0 | -| `qwen/qwen3.5-plus-02-15` | PASS | 0 | -| `mistralai/mistral-7b-instruct-v0.1` | PASS | 0 | -| `x-ai/grok-4.3` | PASS | 0 | | `cohere/command-r-plus-08-2024` | PASS | 0 | -| `claude-opus-4-7` | PASS | 0 | -| `deepseek/deepseek-v3.2` | PASS | 0 | +| `mistralai/codestral-2508` | PASS | 0 | +| `qwen/qwen3.5-plus-02-15` | PASS | 0 | +| `meta-llama/llama-3.1-8b-instruct` | PASS | 0 | +| `openai/o3` | PASS | 0 | | `meta-llama/llama-3.3-70b-instruct` | PASS | 0 | -| `meta-llama/llama-4-maverick` | FAIL | 1 | -| `deepseek/deepseek-v4-flash` | FAIL | 1 | +| `x-ai/grok-4.3` | PASS | 0 | +| `nvidia/llama-3.3-nemotron-super-49b-v1.5` | PASS | 0 | +| `claude-sonnet-4-6` | PASS | 0 | +| `mistralai/mistral-7b-instruct-v0.1` | PASS | 0 | +| `mistralai/mistral-medium-3.1` | PASS | 0 | +| `openai/o4-mini` | PASS | 0 | | `google/gemma-4-31b-it` | FAIL | 1 | -| `moonshotai/kimi-k2.6` | FAIL | 1 | -| `nvidia/nemotron-nano-9b-v2` | FAIL | 1 | | `openai/gpt-5.4` | FAIL | 1 | -| `google/gemini-2.5-pro` | FAIL | 1 | | `deepseek/deepseek-r1` | FAIL | 1 | +| `deepseek/deepseek-v4-flash` | FAIL | 1 | +| `nvidia/nemotron-nano-9b-v2` | FAIL | 1 | +| `moonshotai/kimi-k2.6` | FAIL | 1 | +| `meta-llama/llama-4-maverick` | FAIL | 1 | | `openai/gpt-5.5` | FAIL | 1 | +| `google/gemini-2.5-pro` | FAIL | 1 | | `deepseek/deepseek-r1-distill-llama-70b` | FAIL | 2 | | `cohere/command-a` | FAIL | 3 | -| `openai/gpt-3.5-turbo` | FAIL | 3 | | `microsoft/phi-4` | FAIL | 3 | +| `openai/gpt-3.5-turbo` | FAIL | 3 | | `deepseek/deepseek-r1-0528` | FAIL | 27 | #### `ep-daily-tech-news-show-b576979e1fe8`: Motorola Razr Fold is a Noble Competitor to the Galaxy Z Fold 7 - DTNS 5269 @@ -1167,9 +1167,9 @@ One subsection per episode in the corpus, showing how every model performed on t | `mistralai/mistral-medium-3.1` | 0.162 | 0.049 | | `openai/o4-mini` | 0.147 | 0.202 | | `deepseek/deepseek-r1-distill-llama-70b` | 0.057 | 0.128 | -| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | | `microsoft/phi-4` | 0.000 | 0.000 | +| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | #### `ep-daily-tech-news-show-c1904b8605f7`: Switch 2 Prices Rise, Forecast Drops - DTNS 5265 @@ -1209,8 +1209,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `openai/o4-mini` | 0.067 | 0.149 | | `microsoft/phi-4` | 0.056 | 0.077 | | `meta-llama/llama-3.1-8b-instruct` | 0.029 | 0.064 | -| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `meta-llama/llama-3.3-70b-instruct` | 0.000 | 0.000 | +| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | #### `ep-glt1412515089-373d5ba5007b`: #2496 - Julia Mossbridge @@ -1249,9 +1249,9 @@ One subsection per episode in the corpus, showing how every model performed on t | `mistralai/codestral-2508` | 0.231 | 0.091 | | `meta-llama/llama-3.1-8b-instruct` | 0.183 | 0.109 | | `openai/o4-mini` | 0.080 | 0.179 | -| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | | `microsoft/phi-4` | 0.000 | 0.000 | +| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | #### `ep-it-s-a-thing-e339179dfad6`: SOUP shots - It's a Thing 418 @@ -1261,38 +1261,38 @@ One subsection per episode in the corpus, showing how every model performed on t | Model | F1 | F1 stdev | |-------|----|----------| -| `qwen/qwen3.5-plus-02-15` | 0.667 | 0.000 | | `claude-opus-4-7` | 0.667 | 0.000 | -| `google/gemini-2.5-pro` | 0.667 | 0.000 | +| `qwen/qwen3.5-plus-02-15` | 0.667 | 0.000 | | `openai/gpt-5.5` | 0.667 | 0.000 | +| `google/gemini-2.5-pro` | 0.667 | 0.000 | | `openai/gpt-5.4` | 0.613 | 0.119 | | `google/gemma-4-31b-it` | 0.467 | 0.274 | | `x-ai/grok-4.3` | 0.433 | 0.253 | | `deepseek/deepseek-r1-0528` | 0.404 | 0.281 | +| `deepseek/deepseek-v3.2` | 0.400 | 0.548 | | `cohere/command-a` | 0.400 | 0.000 | | `meta-llama/llama-3.1-8b-instruct` | 0.400 | 0.548 | -| `deepseek/deepseek-v3.2` | 0.400 | 0.548 | | `deepseek/deepseek-v4-flash` | 0.337 | 0.239 | | `openai/o3` | 0.333 | 0.471 | | `deepseek/deepseek-r1` | 0.313 | 0.301 | | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | 0.233 | 0.325 | | `moonshotai/kimi-k2.6` | 0.200 | 0.274 | -| `meta-llama/llama-4-maverick` | 0.000 | 0.000 | -| `openai/o4-mini` | 0.000 | 0.000 | -| `claude-sonnet-4-6` | 0.000 | 0.000 | -| `google/gemini-2.5-flash` | 0.000 | 0.000 | -| `nvidia/nemotron-nano-9b-v2` | 0.000 | 0.000 | -| `mistralai/codestral-2508` | 0.000 | 0.000 | +| `mistralai/mistral-large-2512` | 0.000 | 0.000 | | `claude-haiku-4-5-20251001` | 0.000 | 0.000 | +| `google/gemini-2.5-flash` | 0.000 | 0.000 | | `meta-llama/llama-4-scout` | 0.000 | 0.000 | -| `mistralai/mistral-medium-3.1` | 0.000 | 0.000 | -| `mistralai/mistral-large-2512` | 0.000 | 0.000 | +| `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | +| `mistralai/codestral-2508` | 0.000 | 0.000 | +| `meta-llama/llama-3.3-70b-instruct` | 0.000 | 0.000 | +| `nvidia/nemotron-nano-9b-v2` | 0.000 | 0.000 | +| `claude-sonnet-4-6` | 0.000 | 0.000 | +| `microsoft/phi-4` | 0.000 | 0.000 | +| `meta-llama/llama-4-maverick` | 0.000 | 0.000 | | `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `openai/gpt-3.5-turbo` | 0.000 | 0.000 | -| `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | | `deepseek/deepseek-r1-distill-llama-70b` | 0.000 | 0.000 | -| `microsoft/phi-4` | 0.000 | 0.000 | -| `meta-llama/llama-3.3-70b-instruct` | 0.000 | 0.000 | +| `mistralai/mistral-medium-3.1` | 0.000 | 0.000 | +| `openai/o4-mini` | 0.000 | 0.000 | #### `ep-on-air-with-dan-and-alex2-574e4f303730`: Ryanair Wants Alcohol Bans, Emirates' $6.8B Record Profit & Buying Spirit Airlines?! @@ -1319,9 +1319,9 @@ One subsection per episode in the corpus, showing how every model performed on t | `mistralai/mistral-medium-3.1` | 0.500 | 0.000 | | `x-ai/grok-4.3` | 0.472 | 0.066 | | `mistralai/codestral-2508` | 0.469 | 0.045 | -| `claude-sonnet-4-6` | 0.444 | 0.000 | -| `google/gemini-2.5-flash` | 0.444 | 0.000 | | `mistralai/mistral-large-2512` | 0.444 | 0.000 | +| `google/gemini-2.5-flash` | 0.444 | 0.000 | +| `claude-sonnet-4-6` | 0.444 | 0.000 | | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | 0.431 | 0.109 | | `openai/gpt-3.5-turbo` | 0.400 | 0.000 | | `nvidia/nemotron-nano-9b-v2` | 0.361 | 0.091 | @@ -1329,11 +1329,11 @@ One subsection per episode in the corpus, showing how every model performed on t | `deepseek/deepseek-v3.2` | 0.300 | 0.274 | | `meta-llama/llama-3.1-8b-instruct` | 0.284 | 0.166 | | `microsoft/phi-4` | 0.213 | 0.307 | -| `openai/o4-mini` | 0.133 | 0.298 | | `meta-llama/llama-3.3-70b-instruct` | 0.133 | 0.298 | +| `openai/o4-mini` | 0.133 | 0.298 | | `meta-llama/llama-4-scout` | 0.094 | 0.130 | -| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | +| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | #### `ep-oxide-and-friends-ce789ff5b62e`: Mechanical Engineering at Oxide [chapter images] @@ -1343,32 +1343,32 @@ One subsection per episode in the corpus, showing how every model performed on t | Model | Result | FP count | |-------|--------|----------| -| `nvidia/llama-3.3-nemotron-super-49b-v1.5` | PASS | 0 | -| `meta-llama/llama-4-maverick` | PASS | 0 | -| `openai/o4-mini` | PASS | 0 | -| `deepseek/deepseek-v4-flash` | PASS | 0 | +| `claude-opus-4-7` | PASS | 0 | +| `mistralai/mistral-large-2512` | PASS | 0 | +| `claude-haiku-4-5-20251001` | PASS | 0 | +| `google/gemini-2.5-flash` | PASS | 0 | +| `meta-llama/llama-4-scout` | PASS | 0 | | `cohere/command-a` | PASS | 0 | -| `claude-sonnet-4-6` | PASS | 0 | +| `cohere/command-r-plus-08-2024` | PASS | 0 | +| `mistralai/codestral-2508` | PASS | 0 | | `google/gemma-4-31b-it` | PASS | 0 | -| `google/gemini-2.5-flash` | PASS | 0 | -| `nvidia/nemotron-nano-9b-v2` | PASS | 0 | +| `qwen/qwen3.5-plus-02-15` | PASS | 0 | | `meta-llama/llama-3.1-8b-instruct` | PASS | 0 | -| `mistralai/codestral-2508` | PASS | 0 | -| `claude-haiku-4-5-20251001` | PASS | 0 | | `openai/o3` | PASS | 0 | -| `meta-llama/llama-4-scout` | PASS | 0 | -| `mistralai/mistral-medium-3.1` | PASS | 0 | -| `mistralai/mistral-large-2512` | PASS | 0 | -| `qwen/qwen3.5-plus-02-15` | PASS | 0 | -| `mistralai/mistral-7b-instruct-v0.1` | PASS | 0 | -| `x-ai/grok-4.3` | PASS | 0 | -| `cohere/command-r-plus-08-2024` | PASS | 0 | -| `claude-opus-4-7` | PASS | 0 | +| `deepseek/deepseek-v4-flash` | PASS | 0 | | `meta-llama/llama-3.3-70b-instruct` | PASS | 0 | +| `nvidia/nemotron-nano-9b-v2` | PASS | 0 | +| `x-ai/grok-4.3` | PASS | 0 | +| `nvidia/llama-3.3-nemotron-super-49b-v1.5` | PASS | 0 | +| `claude-sonnet-4-6` | PASS | 0 | +| `meta-llama/llama-4-maverick` | PASS | 0 | +| `mistralai/mistral-7b-instruct-v0.1` | PASS | 0 | | `openai/gpt-5.5` | PASS | 0 | +| `mistralai/mistral-medium-3.1` | PASS | 0 | +| `openai/o4-mini` | PASS | 0 | | `openai/gpt-5.4` | FAIL | 1 | -| `google/gemini-2.5-pro` | FAIL | 1 | | `deepseek/deepseek-r1` | FAIL | 1 | +| `google/gemini-2.5-pro` | FAIL | 1 | | `deepseek/deepseek-v3.2` | FAIL | 2 | | `moonshotai/kimi-k2.6` | FAIL | 4 | | `openai/gpt-3.5-turbo` | FAIL | 10 | @@ -1395,8 +1395,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `claude-sonnet-4-6` | 0.516 | 0.014 | | `meta-llama/llama-3.3-70b-instruct` | 0.512 | 0.035 | | `openai/gpt-5.5` | 0.505 | 0.047 | -| `meta-llama/llama-4-maverick` | 0.496 | 0.021 | | `google/gemma-4-31b-it` | 0.496 | 0.021 | +| `meta-llama/llama-4-maverick` | 0.496 | 0.021 | | `openai/gpt-5.4` | 0.495 | 0.022 | | `x-ai/grok-4.3` | 0.486 | 0.013 | | `qwen/qwen3.5-plus-02-15` | 0.476 | 0.000 | @@ -1454,9 +1454,9 @@ One subsection per episode in the corpus, showing how every model performed on t | `google/gemini-2.5-flash` | 0.125 | 0.000 | | `deepseek/deepseek-v3.2` | 0.100 | 0.224 | | `microsoft/phi-4` | 0.067 | 0.092 | +| `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | | `meta-llama/llama-3.1-8b-instruct` | 0.000 | 0.000 | | `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | -| `cohere/command-r-plus-08-2024` | 0.000 | 0.000 | #### `ep-the-tim-dillon-show-f62bd5fa1cfe`: 495 - Hantavirus Cruise & iPad Babies @@ -1486,8 +1486,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | 0.171 | 0.125 | | `meta-llama/llama-4-maverick` | 0.167 | 0.000 | | `deepseek/deepseek-r1-distill-llama-70b` | 0.163 | 0.159 | -| `google/gemini-2.5-flash` | 0.154 | 0.000 | | `claude-haiku-4-5-20251001` | 0.154 | 0.000 | +| `google/gemini-2.5-flash` | 0.154 | 0.000 | | `openai/gpt-3.5-turbo` | 0.125 | 0.000 | | `microsoft/phi-4` | 0.079 | 0.072 | | `deepseek/deepseek-v3.2` | 0.057 | 0.079 | @@ -1496,8 +1496,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `meta-llama/llama-3.1-8b-instruct` | 0.049 | 0.019 | | `nvidia/nemotron-nano-9b-v2` | 0.044 | 0.061 | | `mistralai/mistral-large-2512` | 0.044 | 0.025 | -| `openai/o4-mini` | 0.000 | 0.000 | | `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | +| `openai/o4-mini` | 0.000 | 0.000 | #### `ep-tosh-show-5f6894439bb6`: My Mom - Emergency Pod @@ -1522,8 +1522,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `mistralai/mistral-medium-3.1` | 0.421 | 0.048 | | `meta-llama/llama-4-maverick` | 0.400 | 0.000 | | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | 0.399 | 0.223 | -| `claude-sonnet-4-6` | 0.375 | 0.000 | | `claude-haiku-4-5-20251001` | 0.375 | 0.000 | +| `claude-sonnet-4-6` | 0.375 | 0.000 | | `mistralai/mistral-large-2512` | 0.353 | 0.000 | | `meta-llama/llama-3.3-70b-instruct` | 0.347 | 0.087 | | `deepseek/deepseek-r1-distill-llama-70b` | 0.340 | 0.152 | @@ -1537,8 +1537,8 @@ One subsection per episode in the corpus, showing how every model performed on t | `google/gemma-4-31b-it` | 0.182 | 0.025 | | `deepseek/deepseek-v3.2` | 0.140 | 0.219 | | `openai/o4-mini` | 0.133 | 0.183 | -| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | | `microsoft/phi-4` | 0.000 | 0.000 | +| `mistralai/mistral-7b-instruct-v0.1` | 0.000 | 0.000 | ### Parser stress test @@ -1547,29 +1547,29 @@ How each model's responses were actually parsed. Columns are extraction methods, | Model | bracket_fallback | json_array_direct | json_object_ads_key | json_object_no_ads | json_object_segments_key | json_object_single_ad | json_object_single_ad_truncated | json_object_window_segments | markdown_code_block | parse_failure | regex_json_array | |---|---|---|---|---|---|---|---|---|---|---|---| -| `meta-llama/llama-4-maverick` | 0 | 0 | 0 | 184 | 0 | 431 | 0 | 0 | 0 | 0 | 0 | -| `cohere/command-a` | 0 | 0 | 0 | 17 | 0 | 598 | 0 | 0 | 0 | 0 | 0 | -| `claude-sonnet-4-6` | 0 | 567 | 0 | 0 | 0 | 0 | 0 | 0 | 33 | 0 | 15 | -| `google/gemma-4-31b-it` | 0 | 0 | 332 | 154 | 0 | 128 | 1 | 0 | 0 | 0 | 0 | -| `google/gemini-2.5-flash` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| `mistralai/codestral-2508` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| `claude-haiku-4-5-20251001` | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 615 | 0 | 0 | -| `mistralai/mistral-medium-3.1` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| `claude-opus-4-7` | 0 | 613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | `mistralai/mistral-large-2512` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| `openai/gpt-3.5-turbo` | 0 | 0 | 0 | 9 | 0 | 606 | 0 | 0 | 0 | 0 | 0 | +| `claude-haiku-4-5-20251001` | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 615 | 0 | 0 | +| `deepseek/deepseek-v3.2` | 0 | 440 | 8 | 0 | 0 | 167 | 0 | 0 | 0 | 0 | 0 | +| `google/gemini-2.5-flash` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| `cohere/command-a` | 0 | 0 | 0 | 17 | 0 | 598 | 0 | 0 | 0 | 0 | 0 | | `cohere/command-r-plus-08-2024` | 0 | 0 | 16 | 554 | 0 | 45 | 0 | 0 | 0 | 0 | 0 | -| `claude-opus-4-7` | 0 | 613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| `mistralai/codestral-2508` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| `google/gemma-4-31b-it` | 0 | 0 | 332 | 154 | 0 | 128 | 1 | 0 | 0 | 0 | 0 | | `openai/gpt-5.4` | 0 | 0 | 0 | 211 | 0 | 404 | 0 | 0 | 0 | 0 | 0 | -| `deepseek/deepseek-v3.2` | 0 | 440 | 8 | 0 | 0 | 167 | 0 | 0 | 0 | 0 | 0 | -| `meta-llama/llama-3.1-8b-instruct` | 0 | 275 | 0 | 63 | 0 | 276 | 0 | 0 | 0 | 1 | 0 | +| `claude-sonnet-4-6` | 0 | 567 | 0 | 0 | 0 | 0 | 0 | 0 | 33 | 0 | 15 | +| `meta-llama/llama-4-maverick` | 0 | 0 | 0 | 184 | 0 | 431 | 0 | 0 | 0 | 0 | 0 | +| `openai/gpt-3.5-turbo` | 0 | 0 | 0 | 9 | 0 | 606 | 0 | 0 | 0 | 0 | 0 | +| `mistralai/mistral-medium-3.1` | 0 | 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | `meta-llama/llama-4-scout` | 30 | 4 | 461 | 56 | 0 | 58 | 0 | 0 | 0 | 1 | 5 | | `qwen/qwen3.5-plus-02-15` | 0 | 614 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | +| `meta-llama/llama-3.1-8b-instruct` | 0 | 275 | 0 | 63 | 0 | 276 | 0 | 0 | 0 | 1 | 0 | | `x-ai/grok-4.3` | 0 | 614 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | | `openai/gpt-5.5` | 0 | 0 | 0 | 353 | 0 | 261 | 0 | 0 | 0 | 1 | 0 | | `google/gemini-2.5-pro` | 0 | 590 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 22 | | `openai/o3` | 0 | 0 | 20 | 459 | 9 | 123 | 0 | 0 | 0 | 4 | 0 | -| `microsoft/phi-4` | 0 | 291 | 29 | 27 | 19 | 234 | 0 | 2 | 0 | 6 | 7 | | `deepseek/deepseek-r1` | 0 | 539 | 2 | 15 | 5 | 36 | 0 | 0 | 10 | 6 | 2 | +| `microsoft/phi-4` | 0 | 291 | 29 | 27 | 19 | 234 | 0 | 2 | 0 | 6 | 7 | | `deepseek/deepseek-r1-distill-llama-70b` | 0 | 20 | 44 | 58 | 0 | 482 | 2 | 0 | 0 | 8 | 1 | | `deepseek/deepseek-v4-flash` | 0 | 44 | 362 | 4 | 1 | 190 | 0 | 0 | 0 | 14 | 0 | | `nvidia/nemotron-nano-9b-v2` | 0 | 541 | 0 | 0 | 0 | 0 | 11 | 0 | 0 | 52 | 11 | @@ -1821,7 +1821,7 @@ The `initial_prompt` carries a sponsor vocabulary so Whisper produces consistent ## Run Metadata -- Report generated: 2026-05-16T00:20:31Z +- Report generated: 2026-05-16T02:54:07Z - Unique work units (current state, last-write-wins after retries): 19680 - Raw rows in calls.jsonl: 19712 (32 superseded by later retries; kept for audit) - Successful: 19680 diff --git a/benchmarks/llm/results/report_assets/agreement.svg b/benchmarks/llm/results/report_assets/agreement.svg index a9db11f3..e8760be2 100644 --- a/benchmarks/llm/results/report_assets/agreement.svg +++ b/benchmarks/llm/results/report_assets/agreement.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.903376 + 2026-05-15T22:54:09.676620 image/svg+xml @@ -43,7 +43,7 @@ L 90.41348 352.913813 L 90.41348 352.913813 L 74.082216 352.913813 z -" clip-path="url(#p7bbacbdfc8)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p60e963e3e1)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> - - + @@ -346,7 +346,7 @@ z - + @@ -375,7 +375,7 @@ z - + @@ -414,7 +414,7 @@ z - + @@ -461,7 +461,7 @@ z - + @@ -495,7 +495,7 @@ z - + @@ -535,7 +535,7 @@ z - + @@ -580,7 +580,7 @@ z - + @@ -605,7 +605,7 @@ z - + @@ -659,7 +659,7 @@ z - + @@ -704,7 +704,7 @@ z - + @@ -718,7 +718,7 @@ z - + @@ -732,7 +732,7 @@ z - + @@ -746,7 +746,7 @@ z - + @@ -760,7 +760,7 @@ z - + @@ -774,7 +774,7 @@ z - + @@ -788,7 +788,7 @@ z - + @@ -802,7 +802,7 @@ z - + @@ -816,7 +816,7 @@ z - + @@ -830,7 +830,7 @@ z - + @@ -844,7 +844,7 @@ z - + @@ -858,7 +858,7 @@ z - + @@ -872,7 +872,7 @@ z - + @@ -886,7 +886,7 @@ z - + @@ -900,7 +900,7 @@ z - + @@ -914,7 +914,7 @@ z - + @@ -928,7 +928,7 @@ z - + @@ -942,7 +942,7 @@ z - + @@ -956,7 +956,7 @@ z - + @@ -970,7 +970,7 @@ z - + @@ -984,7 +984,7 @@ z - + @@ -998,7 +998,7 @@ z - + @@ -1012,7 +1012,7 @@ z - + @@ -1461,16 +1461,16 @@ z +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -1484,11 +1484,11 @@ L -3.5 0 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1502,11 +1502,11 @@ L 777.143125 313.292446 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1520,11 +1520,11 @@ L 777.143125 273.67108 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1538,11 +1538,11 @@ L 777.143125 234.049713 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1556,11 +1556,11 @@ L 777.143125 194.428347 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1575,11 +1575,11 @@ L 777.143125 154.80698 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1594,11 +1594,11 @@ L 777.143125 115.185614 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1613,11 +1613,11 @@ L 777.143125 75.564247 +" clip-path="url(#p60e963e3e1)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -2743,7 +2743,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/alignment.svg b/benchmarks/llm/results/report_assets/alignment.svg index d3ed985f..c60a3692 100644 --- a/benchmarks/llm/results/report_assets/alignment.svg +++ b/benchmarks/llm/results/report_assets/alignment.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.006402 + 2026-05-15T22:54:09.863182 image/svg+xml @@ -43,7 +43,7 @@ L 400.453385 837.433744 L 400.453385 818.184528 L 204.928281 818.184528 z -" clip-path="url(#p2c7899a112)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -1112,11 +1112,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1157,11 +1157,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1197,11 +1197,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1248,11 +1248,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1308,11 +1308,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1344,11 +1344,11 @@ z +" clip-path="url(#p6c3e769b45)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1677,12 +1677,12 @@ z - - + @@ -1838,7 +1838,7 @@ z - + @@ -1965,7 +1965,7 @@ z - + @@ -2006,7 +2006,7 @@ z - + @@ -2086,7 +2086,7 @@ z - + @@ -2179,7 +2179,7 @@ z - + @@ -2216,7 +2216,7 @@ z - + @@ -2256,7 +2256,7 @@ z - + @@ -2284,7 +2284,7 @@ z - + @@ -2318,7 +2318,7 @@ z - + @@ -2359,7 +2359,7 @@ z - + @@ -2385,7 +2385,7 @@ z - + @@ -2421,7 +2421,7 @@ z - + @@ -2447,7 +2447,7 @@ z - + @@ -2491,7 +2491,7 @@ z - + @@ -2530,7 +2530,7 @@ z - + @@ -2562,7 +2562,7 @@ z - + @@ -2632,7 +2632,7 @@ z - + @@ -2670,7 +2670,7 @@ z - + @@ -2715,7 +2715,7 @@ z - + @@ -2751,7 +2751,7 @@ z - + @@ -2784,7 +2784,7 @@ z - + @@ -2805,7 +2805,7 @@ z - + @@ -2842,7 +2842,7 @@ z - + @@ -2875,7 +2875,7 @@ z - + @@ -2902,7 +2902,7 @@ z - + @@ -2931,7 +2931,7 @@ z - + @@ -2971,7 +2971,7 @@ z - + @@ -2997,7 +2997,7 @@ z - + @@ -3032,7 +3032,7 @@ z - + @@ -3084,7 +3084,7 @@ z - + @@ -3147,7 +3147,7 @@ z - + @@ -4586,7 +4586,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/boundary.svg b/benchmarks/llm/results/report_assets/boundary.svg index 445f55fd..5117d650 100644 --- a/benchmarks/llm/results/report_assets/boundary.svg +++ b/benchmarks/llm/results/report_assets/boundary.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.336990 + 2026-05-15T22:54:10.363442 image/svg+xml @@ -43,7 +43,7 @@ L 348.122889 809.440099 L 348.122889 789.950135 L 204.928281 789.950135 z -" clip-path="url(#p8eba2f3263)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: #ff7f0e; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -584,11 +584,11 @@ z +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -629,11 +629,11 @@ z +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -664,11 +664,11 @@ z +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -683,11 +683,11 @@ L 517.595772 21.558281 +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -728,11 +728,11 @@ z +" clip-path="url(#p8a767f801f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1172,12 +1172,12 @@ z - - + @@ -1319,7 +1319,7 @@ z - + @@ -1418,7 +1418,7 @@ z - + @@ -1478,7 +1478,7 @@ z - + @@ -1510,7 +1510,7 @@ z - + @@ -1590,7 +1590,7 @@ z - + @@ -1632,7 +1632,7 @@ z - + @@ -1658,7 +1658,7 @@ z - + @@ -1716,7 +1716,7 @@ z - + @@ -1742,7 +1742,7 @@ z - + @@ -1826,7 +1826,7 @@ z - + @@ -1884,7 +1884,7 @@ z - + @@ -1962,7 +1962,7 @@ z - + @@ -1988,7 +1988,7 @@ z - + @@ -2016,7 +2016,7 @@ z - + @@ -2052,7 +2052,7 @@ z - + @@ -2091,7 +2091,7 @@ z - + @@ -2131,7 +2131,7 @@ z - + @@ -2163,7 +2163,7 @@ z - + @@ -2199,7 +2199,7 @@ z - + @@ -2220,7 +2220,7 @@ z - + @@ -2270,7 +2270,7 @@ z - + @@ -2315,7 +2315,7 @@ z - + @@ -2354,7 +2354,7 @@ z - + @@ -2381,7 +2381,7 @@ z - + @@ -2414,7 +2414,7 @@ z - + @@ -2454,7 +2454,7 @@ z - + @@ -2495,7 +2495,7 @@ z - + @@ -2533,7 +2533,7 @@ z - + @@ -2565,7 +2565,7 @@ z - + @@ -2603,7 +2603,7 @@ z - + @@ -3842,7 +3842,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/calibration.svg b/benchmarks/llm/results/report_assets/calibration.svg index 2a51cc07..b28c5625 100644 --- a/benchmarks/llm/results/report_assets/calibration.svg +++ b/benchmarks/llm/results/report_assets/calibration.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.520054 + 2026-05-15T22:54:09.055942 image/svg+xml @@ -37,20 +37,20 @@ L 204.928281 31.99625 z " style="fill: #ffffff"/> - + +iVBORw0KGgoAAAANSUhEUgAAAd0AAARtCAYAAAAQz5+8AAATLklEQVR4nO3XMcqtVxmGYbd+HIyniH4xRlELsbdS+DipUqT42zSnEEsnYG2lZwA2DsIByJ7CD3o6ByBoIahLggaCYLazuN9iXdcInmLBvd7bf37x8eNLjPjvL383PWFrf/rRNT1hW69+82p6wr4++en0gq19eXoAAOxCdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIHNMDdnY/r+kJW3taz9MTtvW3n308PWFbX/v9r6YnbM2lCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkDkmB6ws/fO6QV7u5/X9IRtvfronekJ2/r8X59PT9iaSxcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQOS21npMj9jV/bymJwCbef3Fb6cnbM2lCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkDkePHm9fQGYDM//MH0gn39++e/np6wNZcuAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYDIMT0A2M/XT//9KS++8dXpCVvz8gEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIHNMDdvbeOb1gb/9c0wv29fbtF9MTtvX27Z+nJ2zNpQsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIHJbaz2mR+zqfl7TE2DEq4/emZ6wrQ8+/O70hK25dAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCI3NZaj+kRu7qf1/QEYDMvX04v2JtLFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJA5JgeAEDns8+mF+zNpQsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIHK8ePN6egMAke98e3rB3ly6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAESO6QHAfr7/vekF+/rLX6cX7M2lCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgcltrPaZH7Op+XtMTgM38+CdfmZ6wNZcuAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILAJFjegAAnT/+4X/TE7bm0gWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEDmmBwDQ+eBb0wv25tIFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIHJbaz2mR8CE+3lNT4Dcu+9OL9ibSxcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQOSYHrCz+3lNTwA28+mn0wv25tIFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIHK8ePN6egMAbMGlCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgcltrPaZH7Op+XtMTgM08refpCVtz6QJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQOaYHwJSn9Tw9YVv385qeACNcugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIre11mN6xK7u5zU9AYCQSxcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBEjukBAHRevpxesDeXLgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAyDE9ANjP+9+cXrCvv/9jesHeXLoAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4ARI7pATDlaT1PT9jW/bymJ8AIly4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgMgxPQCm3M9regKwGZcuAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILAJHbWusxPQIm3M9regKwGZcuAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYDIMT1gZ/fzmp4AQMilCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkDkmB4AQOdpPU9P2JpLFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJA5LbWekyPgAn385qeAGzGpQsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIHJMD9jZ/bymJwCbeVrP0xO25tIFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBICK6ABARXQCIiC4AREQXACKiCwAR0QWAiOgCQER0ASAiugAQEV0AiIguAEREFwAiogsAEdEFgIjoAkBEdAEgIroAEBFdAIiILgBERBcAIqILABHRBYCI6AJARHQBIPJ/3K9wgrYlbAUAAAAASUVORK5CYII=" id="image68e47f7fcb" transform="scale(1 -1) translate(0 -815.76)" x="204.928281" y="-31.41225" width="343.44" height="815.76"/> - - + @@ -118,7 +118,7 @@ z - + @@ -171,7 +171,7 @@ z - + @@ -219,7 +219,7 @@ z - + @@ -240,7 +240,7 @@ z - + @@ -586,12 +586,12 @@ z - - + @@ -726,7 +726,7 @@ z - + @@ -881,7 +881,7 @@ z - + @@ -980,7 +980,7 @@ z - + @@ -1042,7 +1042,7 @@ z - + @@ -1077,7 +1077,7 @@ z - + @@ -1127,7 +1127,7 @@ z - + @@ -1155,7 +1155,7 @@ z - + @@ -1181,7 +1181,7 @@ z - + @@ -1231,7 +1231,7 @@ z - + @@ -1271,7 +1271,7 @@ z - + @@ -1307,7 +1307,7 @@ z - + @@ -1357,7 +1357,7 @@ z - + @@ -1394,7 +1394,7 @@ z - + @@ -1434,7 +1434,7 @@ z - + @@ -1466,7 +1466,7 @@ z - + @@ -1507,7 +1507,7 @@ z - + @@ -1552,7 +1552,7 @@ z - + @@ -1585,7 +1585,7 @@ z - + @@ -1618,7 +1618,7 @@ z - + @@ -1657,7 +1657,7 @@ z - + @@ -1694,7 +1694,7 @@ z - + @@ -1736,7 +1736,7 @@ z - + @@ -1797,7 +1797,7 @@ z - + @@ -1876,7 +1876,7 @@ z - + @@ -1914,7 +1914,7 @@ z - + @@ -1941,7 +1941,7 @@ z - + @@ -1967,7 +1967,7 @@ z - + @@ -2019,7 +2019,7 @@ z - + @@ -2045,7 +2045,7 @@ z - + @@ -2077,7 +2077,7 @@ z - + @@ -5114,18 +5114,18 @@ z " style="fill: #ffffff"/> +iVBORw0KGgoAAAANSUhEUgAAACgAAAMYCAYAAAC5bKVdAAAFG0lEQVR4nO3c0ZHbMBDAUNLeWtJ5ypSYDpQPfAiewasA472lKN8l++/6c5bY5+2A/9EHzkeeKM/7gcBGTOkDGzGlD2zElD5wPvvthGf6T1Af2BZT+sBGTOkDGzFVINXPIKUPbMSUPrARU/rARkzpA2dv95u7/hPUB7bFlD6wEVP6wEZM6QMbMaUPbMSUPrARUwVS/QxS+sBGTOkD+6MKSh/YFlP6wEZM6QMbMaUPnI/8Yaz/BPWBs7+NGNEHzm6LGX1gI6YKpHqSUPrAjhlKH9gWU/rA3uoofWAHNaUP7KCm9IGz5V+xuuvWDwS2xZQ+sGcxpQ9sxJQ+sIOaKpDqxZ3SB/YkofSBPUkofWBbTOkD22JKH9jXb5Q+sIOa0gfOp4Oa0Qe2xZQ+sOsWVSDVMUPpA2d1zDD6wLaY0gfO+rob3XXrBwJntcWMPrArP6UPbIspfWA3akof2I2a0gd2o6b0gW0xVSDVZYHSBzZiSh/Yk4TSB7bFlD5wVn/9xugD+4aV0gf2DSulD+xZTOkDu1FT+sC+YaUKpHqSUPrAniSUPrAtpvSBvdVR+sC+m6H0gX3DSukDO6gpfWBbTOkDGzGlD+ygpgqkOmYofWAjpvSBPUkofWBbTOkDGzGlD+ygpvSBbTGlD2zElD5w9u6gRvSBbTGlD2zEVIFUV35KH9gxQ+kD22JKH9gWU/rARkzpAzuoKX1gW0zpAxsxpQ/soKb0gW0xVSA1a7sb3XXrBwIbMaUP7ElC6QPbYkof2IgpfWAjpvSBPYspfWBbTOkDGzGlD2zElD6wEVMFUt0HKX3g7P19u+GR/hPUB/YkofSBjZjSBzZiSh/YdYvSB7bFlD6wEVP6wEZM6QN7FlMFUh0zlD6wEVP6wEZM6QMbMaUPnOP+J03+T1AfOOfcbzc80n+C+sC5GzGjD5yzGjGiD2yLKX1gW0zpA9tiqkCqKz+lD5y7JwmjD2yLKX1glwVKH9iVn9IHtsWUPnDOut5ueKT/BPWBbTGlD+xGTekDey+mCqQ6Zih9YJcFSh/YizulD2yLKX1gW0zpA9tiSh/YjZrSB/biTukD22JKHzj3OW83PNJ/gvrArltUgVTHDKUPnHs1YkQf2BZT+sAuC5Q+sC2m9IGNmNIHdlBT+sC2mNIH9tJE6QM7qKkCqZ4klD6wEVP6wEZM6QO7LFD6wLaY0gc2Ykof2F+/UfrAtpjSBzZiSh/YiKkCqd7qKH1gxwylD2zElD6wEVP6wLncE/Z/gvrAtpjSB7bFlD5w7kbM6APn6qBm9IFtMaUP7FlMFUh1zFD6wC4LlD5wbvevSfyfoD6wywKlD+xZTOkDexZT+sC2mNIH9iym9IH9KozSB7bFVIFUlwVKH9gxQ+kD55avsf4T1Ae2xZQ+sGcxpQ9siyl9YC/ulD6wLab0gf2ehNIHztXfLDD6wLaYKpCaS/5ap/8E9YEdM5Q+sMsCpQ9siyl9YM9iSh/YFlP6wJ7FlD6wLab0gY2Y0gd23aIKpPpdHaUP7Jih9IFdFih9YFtM6QN7FlP6wLaY0gf2LKb0gY2Y0gf2JTqlD2yLKX1g1y2qQKpjhtIH9t/7UfrAOT1JGH1gI6b0gY2Y0gfO6brF6APbYkof2IgpfWAjpvSBjZgqkOpnkNIHNmJKH9iIKX1gI6b0gX03Q+kD22JKH9iIKX1gI6b0gY2Y0gc2YqpAqp9BSh/YiCl9YC/ulD6wLab0gY2Y0gc2Ykof2IgpfeA/rR5i22l2EbkAAAAASUVORK5CYII=" id="image791c3eb2ee" transform="scale(1 -1) translate(0 -570.24)" x="569.52" y="-154.08" width="28.8" height="570.24"/> - - + @@ -5150,7 +5150,7 @@ z - + @@ -5166,7 +5166,7 @@ z - + @@ -5182,7 +5182,7 @@ z - + @@ -5197,7 +5197,7 @@ z - + @@ -5212,7 +5212,7 @@ z - + @@ -5227,7 +5227,7 @@ z - + @@ -5294,7 +5294,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/compliance.svg b/benchmarks/llm/results/report_assets/compliance.svg index 90da298f..6f7a5cf0 100644 --- a/benchmarks/llm/results/report_assets/compliance.svg +++ b/benchmarks/llm/results/report_assets/compliance.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.185157 + 2026-05-15T22:54:08.364026 image/svg+xml @@ -43,7 +43,7 @@ L 250.649418 946.894645 L 250.649418 924.724262 L 226.120313 924.724262 z -" clip-path="url(#p8c270ab384)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -353,11 +353,11 @@ z +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -399,11 +399,11 @@ z +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -440,11 +440,11 @@ z +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -492,11 +492,11 @@ z +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -553,11 +553,11 @@ z +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1103,12 +1103,12 @@ z - - + @@ -1150,7 +1150,7 @@ z - + @@ -1240,7 +1240,7 @@ z - + @@ -1288,7 +1288,7 @@ z - + @@ -1378,7 +1378,7 @@ z - + @@ -1457,7 +1457,7 @@ z - + @@ -1489,7 +1489,7 @@ z - + @@ -1517,7 +1517,7 @@ z - + @@ -1601,7 +1601,7 @@ z - + @@ -1651,7 +1651,7 @@ z - + @@ -1690,7 +1690,7 @@ z - + @@ -1751,7 +1751,7 @@ z - + @@ -1777,7 +1777,7 @@ z - + @@ -1813,7 +1813,7 @@ z - + @@ -1846,7 +1846,7 @@ z - + @@ -1873,7 +1873,7 @@ z - + @@ -1917,7 +1917,7 @@ z - + @@ -1943,7 +1943,7 @@ z - + @@ -1980,7 +1980,7 @@ z - + @@ -2018,7 +2018,7 @@ z - + @@ -2052,7 +2052,7 @@ z - + @@ -2073,7 +2073,7 @@ z - + @@ -2105,7 +2105,7 @@ z - + @@ -2134,7 +2134,7 @@ z - + @@ -2167,7 +2167,7 @@ z - + @@ -2208,7 +2208,7 @@ z - + @@ -2235,7 +2235,7 @@ z - + @@ -2314,7 +2314,7 @@ z - + @@ -2356,12 +2356,52 @@ z - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -2388,15 +2428,15 @@ z - - + + - + - + - + @@ -2424,15 +2464,15 @@ z - - + + - + - + - + @@ -2464,51 +2504,11 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +" clip-path="url(#pb396e8c9d9)" style="fill: none; stroke-dasharray: 0.8,1.32; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.7; stroke-width: 0.8"/> + diff --git a/benchmarks/llm/results/report_assets/detection_by_length.svg b/benchmarks/llm/results/report_assets/detection_by_length.svg index 6ab91205..7e6cdaf5 100644 --- a/benchmarks/llm/results/report_assets/detection_by_length.svg +++ b/benchmarks/llm/results/report_assets/detection_by_length.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.760337 + 2026-05-15T22:54:11.193901 image/svg+xml @@ -37,20 +37,20 @@ L 204.928281 21.558281 z " style="fill: #ffffff"/> - + +iVBORw0KGgoAAAANSUhEUgAAAToAAASTCAYAAADqafHdAAARdklEQVR4nO3WPavedx3H8Vzyl5pQbEiKeNKQisUOUgung41kElqNlg4uTh0FBfMQOrqVbg6CrnZRCDi5eItIgzZx6n2j1Jim1ea0Eg8nN83lM/j/JvnZN6/XI/gs3zffzU8PPbw9BBD2idkDAP7XhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IC85cwTR2ZvYODqm/uzJ7Di+t7sBYz46IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDshbDh8/PHsDI2/uz17Ailu3Zi9gxEcH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABecvsAYzt789ewJqnf/Ll2RMY8NEBeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2Qt/no4rPb2SNYd/u3L8+ewIqDV96fPYEBHx2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkLdvLV2ZvYOCT33xs9gRW/Ocvv5w9gQEfHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+Rtbv3o29vZI1h359392RNYcc83Hpk9gQEfHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXnLr84+OHsDA0/8/ursCazYvn1t9gQGfHRAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2Qt7ly48fb2SNYd+KDg9kTWHH3xZdmT2DARwfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndEDe5qnzz2xnj2DdL+47NnsCfKz56IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBvObe7M3sDI5dvzl7AmpOfnb2AAR8dkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5C1P/vmfszcw8M7ZM7MnsOLEvz6YPYEBHx2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5y5/OPDh7AwO7nzoxewIrtjeuzp7AgI8OyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8pbHb947ewMj1y7PXsCKv548OnsCAz46IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8pZDh4/O3sDAhbtXZk9gxePLo7MnMOCjA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogLzl6T/+bvYGBs7t7syewIrtK27o/52PDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KWc7s7szcw8NJ7H86ewIqvHf3M7AkM+OiAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IW75+6uzsDQy8f/Dz2RNYsXn4q7MnMOCjA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogLzle795YfYGBr54bJk9gRXbl/8wewIDPjogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyln/8++bsDQx8/9HPz57Ais3x07MnMOCjA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogLzl3O7O7A0MXHj377MnsOJLx0/PnsCAjw7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPylh9eemf2BvhY+87OldkTGPDRAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkLc8e/qh2RsYuHDt2uwJrLh05+3ZExjw0QF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndEDe8oMX35q9gYGvPHBk9gRWnLz31OwJDPjogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IG/z2t5z29kjWPf8xVdnT2DFk6eOzJ7AgI8OyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8pbnL746ewMDv/7b3uwJrPjuI5+bPYEBHx2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5m9f2ntvOHsG6n73xxuwJrLh+cHf2BAZ8dECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZC3fOG+x2ZvYGD/9uuzJ7Dixu3t7AkM+OiAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7I2zx1/pnt7BGsu3H7o9kTWHFu9/7ZExjw0QF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndEDe8sCn75m9gYEPD+7MnsCKS+8dzJ7AgI8OyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAvOVbDx2bvYGB829dnz2BFa/v3Zw9gQEfHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+T9F80/oWYhonDcAAAAAElFTkSuQmCC" id="image28550f1a86" transform="scale(1 -1) translate(0 -843.12)" x="204.928281" y="-21.438054" width="226.08" height="843.12"/> - - + @@ -275,7 +275,7 @@ z - + @@ -457,7 +457,7 @@ z - + @@ -568,12 +568,12 @@ z - - + @@ -755,7 +755,7 @@ z - + @@ -788,7 +788,7 @@ z - + @@ -925,7 +925,7 @@ z - + @@ -985,7 +985,7 @@ z - + @@ -1011,7 +1011,7 @@ z - + @@ -1053,7 +1053,7 @@ z - + @@ -1079,7 +1079,7 @@ z - + @@ -1111,7 +1111,7 @@ z - + @@ -1189,7 +1189,7 @@ z - + @@ -1250,7 +1250,7 @@ z - + @@ -1287,7 +1287,7 @@ z - + @@ -1348,7 +1348,7 @@ z - + @@ -1376,7 +1376,7 @@ z - + @@ -1416,7 +1416,7 @@ z - + @@ -1455,7 +1455,7 @@ z - + @@ -1495,7 +1495,7 @@ z - + @@ -1530,7 +1530,7 @@ z - + @@ -1566,7 +1566,7 @@ z - + @@ -1587,7 +1587,7 @@ z - + @@ -1639,7 +1639,7 @@ z - + @@ -1671,7 +1671,7 @@ z - + @@ -1716,7 +1716,7 @@ z - + @@ -1748,7 +1748,7 @@ z - + @@ -1784,7 +1784,7 @@ z - + @@ -1834,7 +1834,7 @@ z - + @@ -1868,7 +1868,7 @@ z - + @@ -1906,7 +1906,7 @@ z - + @@ -1950,7 +1950,7 @@ z - + @@ -1991,7 +1991,7 @@ z - + @@ -2018,7 +2018,7 @@ z - + @@ -2044,7 +2044,7 @@ z - + @@ -4354,18 +4354,18 @@ z " style="fill: #ffffff"/> +iVBORw0KGgoAAAANSUhEUgAAACkAAAMzCAYAAAA7xTuWAAAFUUlEQVR4nO3c0a2dMBAAUZu7taTzlAmkBPIxEow0p4IRi43hvmT/XX/u9XHH2wH/QxE5hyBTkCiJbNwURWTjpigiGzelSMoc++2EZ4orqYhsC6IoIhs3RRHZuCmKyMZNKZLSPUlRRDZuiiKycVMUkbP3978OKK6kIrLVTSmS0j1JUUQ2booisnFTFJGNm6KIbNyUIindkxRFZOOmKCL7axaKIrLVTVFENm5KkZTuSYoicg7BCUNxJRWRs3+NG6GInN3qZigiGzelSEpPHIoisi2IoohsdVMUkb0tUhSRbeaUIik9cSiKyNmC79HfL1ySyFY3RRHZAYOiiGzclCIpPXEoisi+YFAUkT1xKIrINnOKIrLVTSmS0hZEUUT2fZKiiOyJQ1FEztFmziiS0hZEUUR2nqQoIlvdFEXkrFY3QxHZ6qYUSZn1+37n9wuXJHJWWxBDEdnrA0UR2eqmKCJ7faAUSekdh6KI7PWBoohsdVMUkR0wKIrIxk0pktITh6KIbAuiKCJn9feTDEVkn6Mpisg+R1OKpHTAoCgie32gKCL7HE1RRLaZUxSRbeaUIiltQRRFZK+0FEVkH6woisg+R1MUkW3mlCIpbUEURWTjpigie+JQFJGtbooisnFTiqT0xKEoItuCKIrIxk1RRLaZUxSRrW5KkZTuSYoicvbuiYNQRLa6KYrIxk1RRPb6QCmS0hZEUUS2BVEUka1uiiKycVMUkW3mlCIpbUEURWTjpigie+JQFJGtbooictb+fuf3C1eRnO5JiiKyJw5FEdnqpigiGzdFEdm4KUVSOmBQFJFtQRRFZOOmKCIbN0UR2bgpRVI6T1IUkbP37+2GR4orqYjsiUNRRDZuiiKycVOKpHSepCgi24IoisjGTVFENm6KIrJnN6VISlsQRRHZuCmKyMZNUUQ2booicu7v/2M7x5UskjL3fb3d8EhxJRWRczVuhiJy7tW4EYrIVjdFEdnqphRJaQuiKCJ7faAoIufqicNQRLa6KYrInt2UIim941AUkW1BFEXk3Ot8u+GR4koqIlvdFEVkrw+UIil9waAoItuCKIrIDhgURWQfByiKyFY3pUhKWxBFEdkWRFFE9vpAUUT2cYCiiGx1U4qkzHXfbzc8UlxJRWTnSYoistVNUUTOtRo3QhHZ6qYUSemAQVFEtgVRFJGNm6KIbDOnKCJb3ZQiKb3SUhSRPXEoisieOBRFZOOmKCIbN6VISgcMiiKyLYiiiGzcFEVkfz9JUUS2uilFUronKYrIxk1RRPa2SFFEtropisjGTSmS0j1JUUTO+f1pO66kIrLVTVFEtropRVLm6p5kKCLn7InDUES2uimKyA4YFEVkq5tSJKUDBkUROdf3fxBzXElFZAcMiiKyAwZFEdmzm1IkpS2IoojsgEFRRPYDKEUR2eqmKCJ7dlOKpLQFURSRcwn2IMWVVES2uimKyA4YFEVkq5tSJKUvGBRFZFsQRRHZL2IUReSc/TULQxHZ6qYUSZlT8E6ruJKKyLYgiiKyAwZFEdnqpigie3ZTiqS0BVEUkR0wKIrIVjdFEdm4KYrIzpOUIin9AEpRRLYFURSRHTAoishWN0UR2bObUiSlLYiiiOyAQVFENm6KIrJfHyiKyFY3pUhK50mKIrItiKKI7L8bpSgi524zZygiGzelSEr3JEUROXfnSYYistVNUUQ2booisnFTiqR0T1IUkY2boohs3BRFZOOmKCIbN6VISh+sKIrItiCKIrJxUxSRjZuiiGzclCIp3ZMURWTjpigiGzdFEdnHAYoistVNKZLSPUlRRDZuiiKycVMUkf8AStFjEWJ//8EAAAAASUVORK5CYII=" id="image9c615c08fe" transform="scale(1 -1) translate(0 -589.68)" x="444.24" y="-147.6" width="29.52" height="589.68"/> - - + @@ -4380,7 +4380,7 @@ L 3.5 0 - + @@ -4395,7 +4395,7 @@ L 3.5 0 - + @@ -4410,7 +4410,7 @@ L 3.5 0 - + @@ -4425,7 +4425,7 @@ L 3.5 0 - + @@ -4440,7 +4440,7 @@ L 3.5 0 - + @@ -4487,7 +4487,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/detection_by_position.svg b/benchmarks/llm/results/report_assets/detection_by_position.svg index c7496392..21c3c466 100644 --- a/benchmarks/llm/results/report_assets/detection_by_position.svg +++ b/benchmarks/llm/results/report_assets/detection_by_position.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.892626 + 2026-05-15T22:54:11.419787 image/svg+xml @@ -37,20 +37,20 @@ L 204.928281 21.558281 z " style="fill: #ffffff"/> - + +iVBORw0KGgoAAAANSUhEUgAAAToAAASTCAYAAADqafHdAAARbklEQVR4nO3Wz4pYZx3H4Ux7YjPjpLEhabQ1bS0YU4WSKm6qxY2ggiKuXAreQKHXICJ00Vtw46p0IRV046bgrlQ6pdDSolVK1DR0ESeTaeafd3DelbzJh+e5gi8c3s/5bfzu1JWTUwBhD8weAPD/JnRAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkLZubsycw8tQTsxewZv+z2QsYcdEBeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2Qt3zrhc/P3sDAX/9ye/YEVmxtzl7AiIsOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8pZP/n579gYGfvLHH8+ewIrf/+APsycw4KID8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogb3n6u5dmb2Dk7sHsBaz45ne2Zk9gwEUH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABecun7386ewMD53b3Zk9gxeVf/3D2BAZcdECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZC3cfTGSyezRzDwxQuzF7DinZ/+dvYEBlx0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5G0cvfHSyewRDGxvzV7AmsPD2QsYcNEBeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3jJ7AGMn1743ewIr9l/8zewJDLjogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyFvuvPr27A0M/OfZJ2dPYMVTP/vG7AkMuOiAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogb9n80Vdnb2DgyYe/PnsCa7b/NXsBAy46IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyNs4/ujlk9kj4H72ycVHZ09gwEUH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3nL85ruzNzDwj+8/N3sCK75ya2/2BAZcdECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZC3PPD8t2dvYGBzOTN7AmvOfmH2AgZcdECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+Qtp+7uzd7AwJ3Dw9kTWHF09suzJzDgogPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IC8jePjP5/MHsG624e3Zk9gxd6B73Ovc9EBeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3vLP3fdmb2Bgw//onnb5YGv2BAa8ICBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KWc5+7MHsDAzs3d2ZPYMXlzSuzJzDgogPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IC8Zf9ob/YGBp45f3X2BFZ88Nm/Z09gwEUH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3nLmwa3ZGxjYubkzewIrrj5yZfYEBlx0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkLe8eeOt2RsYePrhx2ZPgPuaiw7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPylifOXpi9gYGPd2/MnsCKvcOPZ09gwEUH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3nLzzn9nb2Dg7vHh7Ams2D3Ynz2BARcdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5C2Pb1+avYGB1/+2M3sCK65dvDh7AgMuOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KW2QMY2z+avYA1W8tDsycw4KID8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAvOXVD3Zmb2Dg3EMbsyew4sadW7MnMOCiA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogLzl+S9dmr2BgVfe+mj2BFb8/GunZ09gwEUH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RA3vLah9dnb2Dg2qNnZk9gxfZp3+de56ID8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAPKED8oQOyBM6IE/ogDyhA/KEDsgTOiBP6IA8oQPyhA7IEzogT+iAvGX34GT2Bgaunvc/upddv707ewIDXhCQJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5yy+eeXz2BgZ++ad3Z09gxa9eeGz2BAZcdECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZC3vPbh9dkbGDi/ucyewIrt02dmT2DARQfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndEDesntwMnsDA5vLg7MnwH3NRQfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5QgfkCR2QJ3RAntABeUIH5AkdkCd0QJ7QAXlCB+QJHZAndECe0AF5/wMy7Y2e2VrzIQAAAABJRU5ErkJggg==" id="image5c01c1c8df" transform="scale(1 -1) translate(0 -843.12)" x="204.928281" y="-21.007108" width="226.08" height="843.12"/> - - + @@ -301,7 +301,7 @@ z - + @@ -431,7 +431,7 @@ z - + @@ -525,12 +525,12 @@ z - - + @@ -745,7 +745,7 @@ z - + @@ -814,7 +814,7 @@ z - + @@ -970,7 +970,7 @@ z - + @@ -1030,7 +1030,7 @@ z - + @@ -1056,7 +1056,7 @@ z - + @@ -1098,7 +1098,7 @@ z - + @@ -1124,7 +1124,7 @@ z - + @@ -1156,7 +1156,7 @@ z - + @@ -1234,7 +1234,7 @@ z - + @@ -1295,7 +1295,7 @@ z - + @@ -1332,7 +1332,7 @@ z - + @@ -1393,7 +1393,7 @@ z - + @@ -1421,7 +1421,7 @@ z - + @@ -1461,7 +1461,7 @@ z - + @@ -1500,7 +1500,7 @@ z - + @@ -1540,7 +1540,7 @@ z - + @@ -1575,7 +1575,7 @@ z - + @@ -1611,7 +1611,7 @@ z - + @@ -1632,7 +1632,7 @@ z - + @@ -1684,7 +1684,7 @@ z - + @@ -1716,7 +1716,7 @@ z - + @@ -1761,7 +1761,7 @@ z - + @@ -1793,7 +1793,7 @@ z - + @@ -1829,7 +1829,7 @@ z - + @@ -1879,7 +1879,7 @@ z - + @@ -1913,7 +1913,7 @@ z - + @@ -1951,7 +1951,7 @@ z - + @@ -1995,7 +1995,7 @@ z - + @@ -2036,7 +2036,7 @@ z - + @@ -2063,7 +2063,7 @@ z - + @@ -2089,7 +2089,7 @@ z - + @@ -4420,18 +4420,18 @@ z " style="fill: #ffffff"/> +iVBORw0KGgoAAAANSUhEUgAAACkAAAMzCAYAAAA7xTuWAAAFUUlEQVR4nO3c0a2dMBAAUZu7taTzlAmkBPIxEow0p4IRi43hvmT/XX/u9XHH2wH/QxE5hyBTkCiJbNyUIindkxRFZOOmKCLn2G8nPFNcSUVkq5uiiGzclCIp3ZMURWTjpigiGzdFEdm4KYrIxk0pkjJ7f/8ThuJKKiLbgiiKyMZNUUQ2booisnFTiqR0T1IUkY2boohs3BRFZOOmKCL7axZKkZS2IIoisnFTFJGNm6KInENwwlBcSUXk7F/jRhRJmd0WxFBENm6KIrInDkUR2eqmKCJb3ZQiKb3SUhSRPXEoisieOBRF5GzB9+jvFy5JZKubUiSlAwZFEdm4KYrInjgURWQfByiKyDZzSpGUnjgURWRbEEUR2eqmKCL7PklRRLaZU4qkzNETh6GIbAuiKCI7T1IUka1uiiJyVqubUSSlLYiiiJz1+37n9wuXJHJWq5uhiOz1gaKIbHVTiqT0jkNRRPb6QFFE9vpAUUS2uimKyJ7dlCIp3ZMURWRPHIoistVNUUTO6u8nGYrIPkdTiqT0zZyiiOyAQVFE9vpAUUT2OZqiiGwzpxRJ6YlDUUS2BVEUkb0tUhSRfbCiKCL7HE0pktITh6KIbAuiKCIbN0UR2WZOUUS2uilFUronKYrInjgURWSrm6KIbNwURWSbOaVISlsQRRHZuCmKyNm7Jw5CEdnqpigiGzelSErvOBRFZFsQRRHZ6qYoIlvdFEVk46YUSemJQ1FEtgVRFJGNm6KIbDOnKCJb3ZQiKbP29zu/X7gkkY2boojsiUNRRLa6KYrIxk0pktI9SVFEdsCgKCJb3RRFZOOmKCIbN6VISvckRRHZeZKiiJy9f283PFJcSUVkmzlFEdm4KUVSuicpisjOkxRFZKuboohs3BRFZOOmFEnpgEFRRLYFURSRjZuiiGzcFEVk46YUSZn7+/8i0HElFZFz39fbDY8UV1IROVfjZigi516NG6GIbHVTiqS0BVEUkW1BFEVkrw8UReRcbeYMRWSrm1IkpQMGRRHZ6wNFEdnqpigi517n2w2PFFdSEdnqphRJ6R2HoojsCwZFEdnqpigiO2BQFJF9HKAUSWkLoigi24IoishWN0UR2esDRRHZxwFKkZS2IIoicq77frvhkeJKKiI7T1IUka1uiiJyrtW4EUVS2oIoisgOGBRFZKuboohs3BRFZJs5pUhKWxBFEdkrLUUR2ROHoohsM6coIhs3pUhK9yRFEdkBg6KIbHVTFJGNm6KI7O8nKUVS2oIoisjGTVFENm6KIrK3RYoistVNKZLSPUlRRDZuiiJyzu9P23ElFZGtbooistVNKZIyV/ckQxE5Z08chiKy1U1RRHbAoCgiW92UIikdMCiKyLm+/4OY40oqIjtgUBSRHTAoisie3ZQiKW1BFEVkBwyKIrIfQCmKyFY3RRHZs5tSJKUtiKKInEuwBymupCKy1U1RRHbAoCgiW92UIil9waAoItuCKIrIfhGjKCLn7K9ZGIrIVjelSMqcgndaxZVURLYFURSRHTAoishWN0UR2bObUiSlLYiiiOyAQVFEtropisjGTVFEdp6kFEnpB1CKIrItiKKI7IBBUUS2uimKyJ7dlCIpbUEURWQHDIoisnFTFJH9+kBRRLa6KUVSOk9SFJFtQRRFZP/dKEUROXebOUMR2bgpRVK6JymKyLk7TzIUka1uiiKycVMUkY2bUiSle5KiiGzcFEVk46YoIhs3RRHZuClFUvpgRVFEtgVRFJGNm6KIbNwURWTjphRJ6Z6kKCIbN0UR2bgpisg+DlAUka1uSpGU7kmKIrJxUxSRjZuiiPwHf6FjEdBNKQ8AAAAASUVORK5CYII=" id="imageacf0d94f1f" transform="scale(1 -1) translate(0 -589.68)" x="444.24" y="-147.6" width="29.52" height="589.68"/> - - + @@ -4446,7 +4446,7 @@ L 3.5 0 - + @@ -4461,7 +4461,7 @@ L 3.5 0 - + @@ -4476,7 +4476,7 @@ L 3.5 0 - + @@ -4491,7 +4491,7 @@ L 3.5 0 - + @@ -4506,7 +4506,7 @@ L 3.5 0 - + @@ -4553,7 +4553,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/episodes.svg b/benchmarks/llm/results/report_assets/episodes.svg index 09d96447..1c55e00d 100644 --- a/benchmarks/llm/results/report_assets/episodes.svg +++ b/benchmarks/llm/results/report_assets/episodes.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.339841 + 2026-05-15T22:54:08.727780 image/svg+xml @@ -37,20 +37,20 @@ L 204.928281 21.558281 z " style="fill: #ffffff"/> - + +iVBORw0KGgoAAAANSUhEUgAAA0UAAARoCAYAAADKEn/TAAAejklEQVR4nO3Z+6/fhV3HcU75nkM5LdiyUqAsjNtgY0bJZDNKJmNZlugc2xTiMJoME37Q7QcTXTKNhmxGiZmLiTcSiZmXMMjcEh0s25i6m8ENiNsQAhQhtRkXLbeOrpfT9hz/ivad8Hw8/oJXPvnm+/0+P++lO065bOMUAACAqE3TAwAAACaJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBtMT2g6Pxd0wt6nn5megGceO/77QunJ+R89fY90xNyzto+vaBnZXl6Qc/jT0wv6HEpAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACAtKXvv/snNqZH1Ox95JXpCTkry9MLeh5/YnpBz9XvXJ2ekLN2YG16Qs6ON75mekLOFz/1v9MTcnyfn3wuRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANKW7jjlso3pETUfuOft0xNy7vr5r01PAF6FrjnwkekJOV/f+onpCTnn75pe0HP1J946PSHHpQgAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAEDa4v0fu2x6Q84Xbvza9IScn73pnOkJOdtuuW56Qs7xb3x3ekLO0+/7y+kJcMIdOza9oOfAv+2dnpDjUgQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBtsXjtGdMbcna8ZnpBz+mv3z49IefOC2+fnpDzgQevn56Qc841P5iekPPeNx2anpBz3x17pyfkbL5k2/SEHJciAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIW6zvPzK9IWfT0vSCnqXNi+kJcMItXXT59ISc4y89MD0h5/iRY9MT4IRbeccbpyfkuBQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIW3zrTx6e3pBz1Q0XTE/IWTrt1OkJcMJtPPK96Qk5y5dsm56Qs/T9V6Yn5PzYtWdNT+hZOzq9IMelCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQNriJz/8hukNOc995X+mJ+Rc8N4rpicEfW96QM6jH7pnekLOxf/5yekJOZsf+Mb0hJxv/8I/T0/Iufpn9k5PyHEpAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkLZYf2VtekPOBZ98z/SEnKPfeHR6ApxwV9x98/SEnEO/9xfTE3IOb1qanpBz4eWbpyfkHHpo3/SEHJciAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIWyy/7szpDTkP/OJnpifkvPXJ26Yn9Pzmr08v6Dlj5/SCnNN/94PTE3L2XHfr9ISczdtPm56Qs9i8mJ6Q41IEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgbXHqxWdPb8g57/VbpifkrK0fnp6Qc+Oem6cn5Kx/9avTE3IOXffe6Qk5h1/2fX6y7X3CMz/ZLjp4bHpCjksRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACAtMUpu3ZOb8jZsnN1ekLO+h/eNj0h585bdk9PyLnxh388PSFn9bHvTk/IOe+qc6cn5JxzdH16Qs6Rv7ppekKOSxEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIC0peP3fmhjekTNXe/6yvSEnGsOfGR6Qs7Xt35iekLO1e9cnZ6Qc96Xbp2ekLO84X3uybbxhc9MT+jZsW16QY5vFgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASFusfXvv9IacSy6aXtCzsbE+PQFOuAvu/eT0hJwXjzw3PSHnhcPPTE/IOeNvH5qekHPOx39uekKOSxEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIC0xf6H9k1vyNm6dXpBz0PP756eALwKPf7S49MTcnaubp+ekLPtR8+enpCztGPn9IQclyIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpix2/9IbpDTnLn//v6Qk5K9t2TU/Iee3HLpue0LPvqekFOeeuvmZ6Qs5Fmy+enpDz/JP/OD0hZ/PawekJOS5FAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0hab3vrj0xtyVh99YXpCzvb969MTcu68Zff0hJwbP7wyPSHn/K27pifkvOfuO6cn5Pz9snfoJ9vGDw5MT8jxKQcAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgLTF7+x9ZHpDzu8/f2h6Qs76BVdOT4AT7umVw9MTcs5fWpmekPP5N10xPSHn+buenJ6Qs3Tm1ukJOS5FAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0ha3/vSN0xty1p/7h+kJcMLduOfm6Qk9Wy6dXpCz8dDXpifkHP3Kw9MTcrbsXJ2ekHPgvIunJ+S4FAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEhbfHr3Z6c35Lz7i09NT8g584r/mJ7Qs2XL9IKco+tr0xNyNn3nyekJOS/f/+z0hJyzb7pyekLOfc9/Z3pCjksRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACAtMU5q2dOb8g5/NLh6Qk5u89an56Qc9WDj09PyHl5x47pCTlnX//+6Qk5O6/dMz0hZ2P3nukJOVcvXzQ9IcelCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0ha//LmHpzfk/Pvf3DA9IectK5dOT8g5+OVvTk/IufP8pekJOb92++7pCTnrf/ob0xNyNv7g7ukJOVue2jc9IcelCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQNrSr37pgxvTI2pu+fgD0xNynv7M+6Yn5Lxt25unJ+Qc3bw6PSFneWkxPSFnz4HHpifkvO6RvdMTcvbf/q3pCTkuRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANIWf/eWa6c35Gx8dHl6Qs6zS0vTE3I2Hvzm9IScIz/1tukJOctr69MTclZO3Tw9IWfjmX3TE3J++Ge/Mj0hx6UIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABA2mJ9+67pDTn7P/vo9IScC6+9anpCztIbvXM52bYe88xPutNWpxfk/Pn9X56ekPNHV14+PSHn6Pra9IQcv6AAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJC2eNfnPjW9Iedfbvut6Qk59++7b3pCzq6njkxPyPmnC6YX9Ow8/czpCTlXnr0yPaHn4OHpBTkXbrp0ekKOSxEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIC0xf4jx6Y39Bw5ML0gZ3WxMj0h596Lpxf0vOPcq6Yn5PzIIb+hJ9vxc3dMT8h59tDe6Qk5r6w9NT0hx6UIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABA2mLfwaPTG3LWzjxrekLO1dvePj0h55EX7p+ekHPbf/3r9IScj775hukJOcc31qYn5Bw5dnB6Qs4ZK/4rnmwuRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANIWK6cuTW/IWdn//PSEnCc2/d/0hJxnfvjy9ISc6y+9fHpCzuH1w9MTcv764XumJ+S8+6I3TE/I2b6yc3pCjksRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkLaYHFL246rGfbC++fGB6Apxwpy+2Tk/IWTl18/SEnKcPHJ+ekHPWaedOT8h57uCe6Qk5LkUAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSFmevLk9vyNm6vG16Qs5iSf/z6nf4+MHpCTmbjnjmJ9umpekFPbtffmx6Qs6eH+ybnpDjnyIAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIA0UQQAAKSJIgAAIE0UAQAAaaIIAABIE0UAAECaKAIAANJEEQAAkCaKAACANFEEAACkiSIAACBNFAEAAGmiCAAASBNFAABAmigCAADSRBEAAJAmigAAgDRRBAAApIkiAAAgTRQBAABpoggAAEgTRQAAQJooAgAA0kQRAACQJooAAIC0/wfFiYLMWX9uHQAAAABJRU5ErkJggg==" id="imaged526bbcee9" transform="scale(1 -1) translate(0 -812.16)" x="204.928281" y="-21.557283" width="602.64" height="812.16"/> - - + @@ -360,7 +360,7 @@ z - + @@ -392,7 +392,7 @@ z - + @@ -625,7 +625,7 @@ z - + @@ -649,7 +649,7 @@ z - + @@ -720,7 +720,7 @@ z - + @@ -774,7 +774,7 @@ z - + @@ -834,7 +834,7 @@ z - + @@ -897,7 +897,7 @@ z - + @@ -920,12 +920,12 @@ z - - + @@ -1060,7 +1060,7 @@ z - + @@ -1086,7 +1086,7 @@ z - + @@ -1125,7 +1125,7 @@ z - + @@ -1151,7 +1151,7 @@ z - + @@ -1184,7 +1184,7 @@ z - + @@ -1205,7 +1205,7 @@ z - + @@ -1246,7 +1246,7 @@ z - + @@ -1317,7 +1317,7 @@ z - + @@ -1350,7 +1350,7 @@ z - + @@ -1414,7 +1414,7 @@ z - + @@ -1446,7 +1446,7 @@ z - + @@ -1483,7 +1483,7 @@ z - + @@ -1511,7 +1511,7 @@ z - + @@ -1550,7 +1550,7 @@ z - + @@ -1579,7 +1579,7 @@ z - + @@ -1631,7 +1631,7 @@ z - + @@ -1671,7 +1671,7 @@ z - + @@ -1708,7 +1708,7 @@ z - + @@ -1753,7 +1753,7 @@ z - + @@ -1789,7 +1789,7 @@ z - + @@ -1823,7 +1823,7 @@ z - + @@ -1858,7 +1858,7 @@ z - + @@ -1890,7 +1890,7 @@ z - + @@ -1926,7 +1926,7 @@ z - + @@ -1976,7 +1976,7 @@ z - + @@ -2016,7 +2016,7 @@ z - + @@ -2054,7 +2054,7 @@ z - + @@ -2098,7 +2098,7 @@ z - + @@ -2139,7 +2139,7 @@ z - + @@ -2165,7 +2165,7 @@ z - + @@ -2192,7 +2192,7 @@ z - + @@ -5336,18 +5336,18 @@ z " style="fill: #ffffff"/> +iVBORw0KGgoAAAANSUhEUgAAACIAAAKlCAYAAACubT6LAAAENElEQVR4nO2dwZHbUAzFKJu1pPOUaWkPmRw+tgDjAFSAIR8pSt5Mrr/z5xkBr28L/Ecjsi+JikRDJFJrSBUhGpFaQzQi+7q+rfAPUUUkKhINkUitIRqRWkOqCNGI1BpSRYhGpNYQjchel+N69lSksAKNSK0hGpFaQ6oI0YjUGqIRqTWkihCNSK0hGpF+FCCFlWhEag2pIkQjsi/JatVURCOy17vWHOxVWE80IrWGaETaI6SwEo1IYSUakU5F0h4hGpH2CNGI7CX5HOCwmML6G41IK55UEaIRaY8QjUjHM2mPEI1Ie4RoRJoaUliJRqQ3PaIRaY+QfbVHTjQihZVoRHr6ksJKNCI7hfWksBKNyM7b4eKwGJHITlNz0hlANCKFlWhEukdI9wjRiHSPEI1IU0ParEQjUmuIRqQ9Qgor0Yjs9OPiSS/hRCPSSzjRiLTiSWcA0Yj0Ek40Iu0R0h4hGpHCSjQinYqk9xqiEeklnGhE2iOksBKNSK0hjS/RiBRWohGpNaQ9QjQihZVoRGoNaY8QjUhhJRqRWkP6L8qIRqSwEo1IrSHdI0QjUliJRqSpIYWVaERqDWl8iUaksBKNSK0h7RGiESmsRCOyczlcHBZTRX6jEWmPEI1IU0OqCNGI1BqiEWnFk8JKNCK1hlQRohGpNUQj0kOP7HW9v+0wM6KKaETaI0QjUmtIFSEakR56RCPS1JAqQjQitYZoRFrxpLASjUitIVWEaERqDdGI7OP4MzRTRZ772w4zI6qIRmTvWnOiEdlnas1BYSUakcJKNCJNDekeIRqRvdsjJxqRpoa0WYlGpHuEFFaiEdlnPt92mBlRRTQiTQ3pDCAakY5nohFpakiblWhEOp6JRqSpIYWVaEQKK9GIdI+QjmeiESmsZO/n+bbDzIgqohHpoUc0Ik0N2XuqyIFGpLASjUgrnhRWohGpNUQj0h4hhZVoRDoViUakPULaI0QjUmuIRqTWkDYr0YgUVlJFiEakHxeJRqSpIVWEaERqDdGIdCqSwko0IrWGaERqDdmPoyCeimhECivRiDQ1ZO8qcqIR2U975KSwEo1Im5VoRJoa0mYlGpG9HR+MPBXRiLTiSZuVaERa8UQj0tSQNivRiPQxj2hEmhrSZiUakcJK9pakVVMRjUhhJRqRVjwprEQj0vFMNCJNDemLEdGI7KcfBU40Ik0N2Y/kRNNURCNSWIlGpBVPCivRiLTiSWElGpE2K9GINDWkihCNSA89ohHpYx4prEQj0oonGpGmhrRZiUaksBKNSCueVBGiEen7CCmsRCPSQ49oRJoa0r/lJBqRfdojJxqRWkOqCNGI7NND70Qj0tSQKkI0IrWGaERqDakiRCNSa0gVIRqRWkM0Ir3XkMJKNCK1hmhEag2pIkQjUmuIRqTWkCpCNCIdz0Qj0tSQKkI0IrWGaERqDfkBUaBh9bAbYHwAAAAASUVORK5CYII=" id="image11f9a95b8b" transform="scale(1 -1) translate(0 -487.44)" x="844.56" y="-183.6" width="24.48" height="487.44"/> - - + @@ -5362,7 +5362,7 @@ L 3.5 0 - + @@ -5377,7 +5377,7 @@ L 3.5 0 - + @@ -5392,7 +5392,7 @@ L 3.5 0 - + @@ -5407,7 +5407,7 @@ L 3.5 0 - + @@ -5422,7 +5422,7 @@ L 3.5 0 - + @@ -5514,7 +5514,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/latency_tail.svg b/benchmarks/llm/results/report_assets/latency_tail.svg index f9876340..51677428 100644 --- a/benchmarks/llm/results/report_assets/latency_tail.svg +++ b/benchmarks/llm/results/report_assets/latency_tail.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.801562 + 2026-05-15T22:54:09.404858 image/svg+xml @@ -43,7 +43,7 @@ L 230.967827 1163.660463 L 230.967827 1156.81948 L -158303.89101 1156.81948 z -" clip-path="url(#pebf67da638)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #1f77b4; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: #d62728; stroke: #000000; stroke-width: 0.3; stroke-linejoin: miter"/> +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -1128,11 +1128,11 @@ z +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1148,11 +1148,11 @@ L 407.339082 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1194,11 +1194,11 @@ z +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1248,16 +1248,16 @@ z +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -1265,11 +1265,11 @@ L 0 2 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1277,11 +1277,11 @@ L 224.226284 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1289,11 +1289,11 @@ L 233.421063 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1301,11 +1301,11 @@ L 241.531432 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1313,11 +1313,11 @@ L 296.515516 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1325,11 +1325,11 @@ L 324.435257 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1337,11 +1337,11 @@ L 344.244628 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1349,11 +1349,11 @@ L 359.60997 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1361,11 +1361,11 @@ L 372.164369 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1373,11 +1373,11 @@ L 382.778962 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1385,11 +1385,11 @@ L 391.97374 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1397,11 +1397,11 @@ L 400.084109 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1409,11 +1409,11 @@ L 455.068194 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1421,11 +1421,11 @@ L 482.987934 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1433,11 +1433,11 @@ L 502.797306 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1445,11 +1445,11 @@ L 518.162648 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1457,11 +1457,11 @@ L 530.717046 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1469,11 +1469,11 @@ L 541.331639 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1481,11 +1481,11 @@ L 550.526417 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1493,11 +1493,11 @@ L 558.636787 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1505,11 +1505,11 @@ L 613.620871 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1517,11 +1517,11 @@ L 641.540612 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1529,11 +1529,11 @@ L 661.349983 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1541,11 +1541,11 @@ L 676.715325 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1553,11 +1553,11 @@ L 689.269724 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1565,11 +1565,11 @@ L 699.884316 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1577,11 +1577,11 @@ L 709.079095 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1589,11 +1589,11 @@ L 717.189464 21.558281 +" clip-path="url(#p56ed1181c7)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -2022,12 +2022,12 @@ z - - + @@ -2173,7 +2173,7 @@ z - + @@ -2248,7 +2248,7 @@ z - + @@ -2305,7 +2305,7 @@ z - + @@ -2382,7 +2382,7 @@ z - + @@ -2422,7 +2422,7 @@ z - + @@ -2491,7 +2491,7 @@ z - + @@ -2556,7 +2556,7 @@ z - + @@ -2588,7 +2588,7 @@ z - + @@ -2625,7 +2625,7 @@ z - + @@ -2682,7 +2682,7 @@ z - + @@ -2743,7 +2743,7 @@ z - + @@ -2776,7 +2776,7 @@ z - + @@ -2802,7 +2802,7 @@ z - + @@ -2852,7 +2852,7 @@ z - + @@ -2886,7 +2886,7 @@ z - + @@ -2913,7 +2913,7 @@ z - + @@ -2940,7 +2940,7 @@ z - + @@ -2980,7 +2980,7 @@ z - + @@ -3018,7 +3018,7 @@ z - + @@ -3060,7 +3060,7 @@ z - + @@ -3088,7 +3088,7 @@ z - + @@ -3114,7 +3114,7 @@ z - + @@ -3140,7 +3140,7 @@ z - + @@ -3161,7 +3161,7 @@ z - + @@ -3207,7 +3207,7 @@ z - + @@ -3277,7 +3277,7 @@ z - + @@ -3310,7 +3310,7 @@ z - + @@ -3347,7 +3347,7 @@ z - + @@ -3379,7 +3379,7 @@ z - + @@ -3431,7 +3431,7 @@ z - + @@ -3463,7 +3463,7 @@ z - + @@ -3984,7 +3984,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/pareto.svg b/benchmarks/llm/results/report_assets/pareto.svg index 505a5918..7da8190c 100644 --- a/benchmarks/llm/results/report_assets/pareto.svg +++ b/benchmarks/llm/results/report_assets/pareto.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:32.047453 + 2026-05-15T22:54:08.113712 image/svg+xml @@ -42,16 +42,16 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -88,11 +88,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -122,11 +122,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -166,11 +166,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -218,11 +218,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -257,11 +257,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -703,16 +703,16 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -737,11 +737,11 @@ z +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -757,11 +757,11 @@ L 724.90125 201.19427 +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -777,11 +777,11 @@ L 724.90125 163.811383 +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -797,11 +797,11 @@ L 724.90125 126.428496 +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -817,11 +817,11 @@ L 724.90125 89.045609 +" clip-path="url(#p062668962a)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1103,7 +1103,7 @@ L 724.90125 22.318125 - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + @@ -1824,7 +1967,7 @@ z - + @@ -2008,7 +2151,7 @@ z - + @@ -2056,7 +2199,7 @@ z - + @@ -2156,7 +2299,7 @@ z - + @@ -2204,7 +2347,7 @@ z - + @@ -2291,7 +2434,7 @@ z - + @@ -2334,7 +2477,7 @@ z - + @@ -2412,7 +2555,7 @@ z - + @@ -2505,7 +2648,7 @@ z - + @@ -2560,7 +2703,7 @@ z - + @@ -2614,7 +2757,7 @@ z - + @@ -2668,7 +2811,7 @@ z - + @@ -2727,7 +2870,7 @@ z - + @@ -2777,7 +2920,7 @@ z - + @@ -2838,7 +2981,7 @@ z - + @@ -2889,7 +3032,7 @@ z - + @@ -2963,7 +3106,7 @@ z - + @@ -3025,7 +3168,7 @@ z - + @@ -3084,7 +3227,7 @@ z - + @@ -3151,7 +3294,7 @@ z - + @@ -3209,7 +3352,7 @@ z - + @@ -3265,7 +3408,7 @@ z - + @@ -3322,7 +3465,7 @@ z - + @@ -3376,7 +3519,7 @@ z - + @@ -3434,7 +3577,7 @@ z - + @@ -3506,7 +3649,7 @@ z - + @@ -3568,7 +3711,7 @@ z - + @@ -3628,7 +3771,7 @@ z - + @@ -3694,7 +3837,7 @@ z - + @@ -3757,7 +3900,7 @@ z - + @@ -3805,7 +3948,7 @@ z - + @@ -3855,7 +3998,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/parser_stress.svg b/benchmarks/llm/results/report_assets/parser_stress.svg index f3f1266b..215925e0 100644 --- a/benchmarks/llm/results/report_assets/parser_stress.svg +++ b/benchmarks/llm/results/report_assets/parser_stress.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:34.106552 + 2026-05-15T22:54:11.687289 image/svg+xml @@ -37,20 +37,20 @@ L 204.928281 33.961813 z " style="fill: #ffffff"/> - + +iVBORw0KGgoAAAANSUhEUgAAA90AAARdCAYAAABM2SHQAAAd40lEQVR4nO3Zv8vudR3Hca9zX+f464hmJkZKgoVLRUEO0doYJEFLoTUEgW051VJNRVSjS4la4CKIY9TS0mJna4iG6Jf9Vg+lpufofX/bmlyC+3ne3Nfn8fgLXsPN576e3/du2y5t19G7cnl6wRr2N08vWMJT+y9MTzh4n9+enp4AAMApODc9AAAAAA6V6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAIDIfnrAKk6e+c70hCVsv3hxesISPr89PT0BAADOBJduAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiOynB6zi2w/9YXrCEr52/NT0BAAAgP9x6QYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAICI6AYAAIDIbtsubdMjgLPlb/d9enrCwbvrt89OT4BTs/3qx9MTlrD74EPTEwB4Gy7dAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAEBHdAAAAENlt26VtesQKXvrQZ6YnLOH2xz8xPWEJuwe+ND0BAADOBJduAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiIhuAAAAiOy27dI2PWIF2x9/Oj1hCX/+2GPTE5Zw9wvPTk84fG/9Z3rBGs5fnF4Ap+fkzekFazh3fnoBcMa4dAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBkPz1gFa8//OT0hCW850cfn56whp3vdbnzF6cXAGfNufPTCwB4G345AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQER0AwAAQGQ/PWAVNz758PSENVz51/QCOBXHX31kesISjr712PQEAODAuXQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABAZLdtl7bpEUt47a/TC5bw3MVHpycs4cHt6ekJh++t16cXrGF/4/QCAODAuXQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABAZD89YBXfv/jo9IQlfOXNH05PgNOxv3F6AQAAp8ClGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACK7bbu0TY8AgAnf3H12esISvr49PT0BAMa4dAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBEdAMAAEBkt508v02PWMIrv59esIad70jXxC33Ti84eC+858HpCUu4+8/PTU9Yw8nx9II1nDuaXrCGK5enF6zh+ndML4BTo1AAAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgIroBAAAgstuu/nybHrGC39z18PSEJdz/u+9OT1jDxXumFxy+11+cXrCGm989vQAAOHAu3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABAR3QAAABDZX3f1lekNS3j/N+6dnrCE7Y3L0xPW8JfnpxccvN19n5yesIbtZHrBGna+8QOwLv8FAQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAICK6AQAAILKfHrCK3Ufvm56whN27PjI9YQ3v/MD0goN3/MVHpics4eiJx6cnAAAHzqUbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIqIbAAAAIvvt5V9Pb1jC9pe/TU9Ywx9/Nr1gDds2veDgHT3x+PQEAABOgUs3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAAREQ3AAAARPbbT38yvQFOze62+6YnrOHW900vAACAM8GlGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACKiGwAAACL76944nt6whrsvTi9Yww13TC9Yw4l3I/fmv6cXrOH6d0wvWIM349o4uTK9YA37m6YXAGeMSzcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABE9tvLV6Y3LOHoy9+bngCnZvvHL6cnHLzdnQ9MT1jCdukH0xOWsPvw56YnrGF/0/QCAN6GSzcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABERDcAAABEdn+//73b9IgVvOuZB6cnLGH3wYemJ8DpOHlzesEazp2fXgAAHDiXbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIiIbgAAAIjsX3hBd18Ld97/qekJa9hOphes4dU/TS84fBdunV6whutvm14AABw4xQ0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAACR/cuvnZ/esIaTq9ML1nB8ZXrBGm6+e3rB4Tt3NL1gDcfe5mvi6ML0gjWcHE8vWIP3Gfg/uXQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABAZH8yvWAVN9wxvQBOz6t/ml5w+C7eM71gDUcXphes4fjq9II1+HvmkHg3rg3vxjXh0g0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAACR/T+nFwBnz8V7phfA6Xjj5ekFa9jfOL0AOGuOLkwvgFPj0g0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAACR/eXpBcDZs51MLzh8O99Er4kbbp9esIarr0wvAIAxftUBAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABARHQDAABAZP/S9ALg7Nn5Xgf8Hy7cMr0AAMb45QwAAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAAAR0Q0AAACR/wKCC8NHBLsQDwAAAABJRU5ErkJggg==" id="imagea7acb07b6c" transform="scale(1 -1) translate(0 -804.24)" x="204.928281" y="-33.285347" width="712.08" height="804.24"/> - - + @@ -351,7 +351,7 @@ z - + @@ -453,7 +453,7 @@ z - + @@ -483,7 +483,7 @@ z - + @@ -579,7 +579,7 @@ z - + @@ -626,7 +626,7 @@ z - + @@ -705,7 +705,7 @@ z - + @@ -750,7 +750,7 @@ z - + @@ -786,7 +786,7 @@ z - + @@ -814,7 +814,7 @@ z - + @@ -857,7 +857,7 @@ z - + @@ -898,17 +898,17 @@ z - - + - - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + - + - + - + - + + - + - + - + - @@ -1212,50 +1254,10 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -1318,7 +1320,7 @@ z - + @@ -1364,7 +1366,7 @@ z - + @@ -1403,7 +1405,7 @@ z - + @@ -1436,7 +1438,7 @@ z - + @@ -1497,7 +1499,7 @@ z - + @@ -1577,7 +1579,7 @@ z - + @@ -1609,7 +1611,7 @@ z - + @@ -1646,7 +1648,7 @@ z - + @@ -1680,7 +1682,7 @@ z - + @@ -1732,7 +1734,7 @@ z - + @@ -1759,7 +1761,7 @@ z - + @@ -1803,7 +1805,7 @@ z - + @@ -1848,7 +1850,7 @@ z - + @@ -1886,7 +1888,7 @@ z - + @@ -1918,7 +1920,7 @@ z - + @@ -1968,7 +1970,7 @@ z - + @@ -2004,77 +2006,77 @@ z - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - + - - + + @@ -2090,14 +2092,27 @@ z - + + + + + + + + + + + + + + - + @@ -2130,44 +2145,33 @@ z - + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - + @@ -2188,12 +2192,51 @@ z - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -2231,15 +2274,15 @@ z - - + + - + - + - + @@ -2263,55 +2306,14 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -2326,19 +2328,19 @@ z - + - + - - + + @@ -2346,13 +2348,13 @@ z - - - - - - - + + + + + + + @@ -2989,59 +2991,51 @@ L 916.664281 33.961813 - - - - - + + + + + - - - - - + + + + + - - - - + + + + - - - - + + + + - - + + - - + + - - - - - - - - - - + + - + - + @@ -3049,7 +3043,7 @@ L 916.664281 33.961813 - + @@ -3057,7 +3051,7 @@ L 916.664281 33.961813 - + @@ -3065,18 +3059,26 @@ L 916.664281 33.961813 - + + + + + + + + + - - - + + + - + @@ -3115,99 +3117,99 @@ L 916.664281 33.961813 + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - - - - + + + + - - - - - + + + + + - - - - - + + + - - - - - + + + + - - - - - + + + + - - - + + + + + @@ -3878,18 +3880,18 @@ z " style="fill: #ffffff"/> +iVBORw0KGgoAAAANSUhEUgAAACcAAAMOCAYAAACd6B9WAAAE+klEQVR4nO2dwXHDQAzE6Az7ryw1ianAeeAjPIAKMGJ2eTo7yefu90bKz9sC/6GW2zntVN1PTi23M8/bDl9RPzm1XGmlqOVKKyU5SlVCUcvtTGNFqOV2rg2BUMtVwhS1XCVMUctVwhS1XGmlqOVKK0UtV1oparlOwhS1XDebFLVcaaWo5RorRS1XCVPUcqWVopYrrRS1XGmlqOW6jqCo5bo8pCRHaUNQ1HKd5yhqudJKUct1nqOo5TrPUdRypZWilmu3UtRylTBFLVcJU9RylTBFLdd1BEUtV1oparnGSlHLVcIUtVxppajlesGhqOW6jqCo5SphilquEqao5SphSnKUNgRFLdeGoKjl2hAUtVxppajl9tqtDLVcJUxRy1XCFLVcLzgUtVwlTFHLVcIUtVwlTFHLNVaKWq5v5FDUcqWVopbryERRy/WCQ1HLVcIUtVwlTFHLVcKU5ChtCIparg1BUcvtPG0IhFquEqao5RorRS3XMZ2iliutFLXcztNYEWq5SpiilquEKWq5xkpRy3UdQVHLlVaKWq6xUtRyjZWilusFh6KW6zqCoparhClquUqYkhylKqGo5Vr8FLVcG4KilquEKWq5xkpRy1XCFLVcRyaKWq4SpqjlKmGKWq60UtRy7VaKWq4SpqjlKmGKWq6xUtRyjZWiluvIRFHLrfg35d1PTi1XCVPUcqWVopYrrZTkKFUJRS3XMZ2iliutFLVci5+iliutFLVcaaWo5UorRS3XSZiiliutFLVcu5WillvxVN1PTi1XWilquXYrRS1XWilquRX/50D3k1PLlVaKWq7dSlHLlVaKWq60UpKjVCUUtVzHdIparrRS1HItfoparrRS1HKllaKWK60UtVwnYYpari9aUdRypZWiliutFLVcY6Wo5SphilqutFLUcqWVopYrrRS1XGmlqOVKK0UtV1opyVGqEoparrFS1HJtCIparrRS1HIr/lP97ienliutFLVcu5WiliutFLVcY6Wo5fq2K0UtV1oparmOTBS1XGmlqOVKK0UtV1oparnGSlHLVcIUtVxppajlSitFLVdaKclRqhKKWm6vKmGo5fqj7hS1XGmlqOX6ohVFLdeHwRS1XCVMUct1ZKKo5UorRS3XkYmiluvPClLUcqWVopZrt1LUcp2EKWq5PsGhqOX2xHFVPzm1XEcmilquIxNFLdeRiZIcpW+BUdRyLX6KWq4NQVHLdZ6jqOUqYYparo+XKGq50kpRy3WzSVHLlVaKWq7rCIparvdWilquEqao5bqOoKjlKmGKWm5vPm87fEX95NRypZWilmusFLXcik9M7ienltu7disiOUobgqKWW/FU3U9OLdeGoKjlOs9R1HLtVoparhKmqOU6MlHUcpUwRS23TyXMUMuVVoparrFS1HJ9GExRy5VWilquWyaKWq7rCIparhKmqOUaK0Ut1+UhRS3XLRNFLVcJU5Kj9DNHUcv19kVRy3Weo6jlKmGKWq5jOkUtV1oparkV/xq/+8mp5RorRS3XWClquV5wKGq50kpRyzVWilqusVLUco2VopZrrBS1XGOlqOUaK0Ut11gparnGSkmO0s8cRS3XzSZFLVdaKWq5xkpRyzVWilqusVLUco2VopZrrBS1XGOlqOUaK0Ut11gparnGSlHLNVaKWq47YYparrRS1HKNlaKWa6wUtVxjpajlGislOcofcTq0zmoU5BAAAAAASUVORK5CYII=" id="image4976f98383" transform="scale(1 -1) translate(0 -563.04)" x="961.2" y="-154.08" width="28.08" height="563.04"/> - - + @@ -3902,7 +3904,7 @@ L 3.5 0 - + @@ -3917,7 +3919,7 @@ L 3.5 0 - + @@ -3932,7 +3934,7 @@ L 3.5 0 - + @@ -3947,7 +3949,7 @@ L 3.5 0 - + @@ -3962,7 +3964,7 @@ L 3.5 0 - + @@ -3977,7 +3979,7 @@ L 3.5 0 - + @@ -4023,7 +4025,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/precision_recall.svg b/benchmarks/llm/results/report_assets/precision_recall.svg index 040e853f..194fd648 100644 --- a/benchmarks/llm/results/report_assets/precision_recall.svg +++ b/benchmarks/llm/results/report_assets/precision_recall.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.130268 + 2026-05-15T22:54:10.100902 image/svg+xml @@ -42,16 +42,16 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -97,11 +97,11 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -143,11 +143,11 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -184,11 +184,11 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -236,11 +236,11 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -297,11 +297,11 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -789,16 +789,16 @@ z +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -814,11 +814,11 @@ L -3.5 0 +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -834,11 +834,11 @@ L 724.90125 212.756702 +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -854,11 +854,11 @@ L 724.90125 169.141317 +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -874,11 +874,11 @@ L 724.90125 125.525933 +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -894,11 +894,11 @@ L 724.90125 81.910548 +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1089,7 +1089,7 @@ L 131.420242 76.717249 L 130.461002 57.998797 L 129.648942 38.295163 L 129.648942 38.295163 -" clip-path="url(#p178a3923db)" style="fill: none; stroke-dasharray: 2.22,0.96; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.5; stroke-width: 0.6"/> +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke-dasharray: 2.22,0.96; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.5; stroke-width: 0.6"/> +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke-dasharray: 2.22,0.96; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.5; stroke-width: 0.6"/> +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke-dasharray: 2.22,0.96; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.5; stroke-width: 0.6"/> +" clip-path="url(#peadb4b1c21)" style="fill: none; stroke-dasharray: 2.22,0.96; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.5; stroke-width: 0.6"/> - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + @@ -2504,7 +2647,7 @@ z - + @@ -2725,7 +2868,7 @@ z - + @@ -2776,7 +2919,7 @@ z - + @@ -2860,7 +3003,7 @@ z - + @@ -2911,7 +3054,7 @@ z - + @@ -2957,7 +3100,7 @@ z - + @@ -3015,7 +3158,7 @@ z - + @@ -3088,7 +3231,7 @@ z - + @@ -3155,7 +3298,7 @@ z - + @@ -3241,7 +3384,7 @@ z - + @@ -3316,7 +3459,7 @@ z - + @@ -3373,7 +3516,7 @@ z - + @@ -3435,7 +3578,7 @@ z - + @@ -3488,7 +3631,7 @@ z - + @@ -3552,7 +3695,7 @@ z - + @@ -3629,7 +3772,7 @@ z - + @@ -3683,7 +3826,7 @@ z - + @@ -3748,7 +3891,7 @@ z - + @@ -3810,7 +3953,7 @@ z - + @@ -3880,7 +4023,7 @@ z - + @@ -3939,7 +4082,7 @@ z - + @@ -4000,7 +4143,7 @@ z - + @@ -4060,7 +4203,7 @@ z - + @@ -4117,7 +4260,7 @@ z - + @@ -4192,7 +4335,7 @@ z - + @@ -4253,7 +4396,7 @@ z - + @@ -4318,7 +4461,7 @@ z - + @@ -4381,7 +4524,7 @@ z - + @@ -4450,7 +4593,7 @@ z - + @@ -4516,7 +4659,7 @@ z - + @@ -4567,7 +4710,7 @@ z - + @@ -4620,7 +4763,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/token_efficiency.svg b/benchmarks/llm/results/report_assets/token_efficiency.svg index fe75ba55..ec054db6 100644 --- a/benchmarks/llm/results/report_assets/token_efficiency.svg +++ b/benchmarks/llm/results/report_assets/token_efficiency.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.482098 + 2026-05-15T22:54:10.625272 image/svg+xml @@ -42,16 +42,16 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -128,11 +128,11 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -182,11 +182,11 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -223,16 +223,16 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -240,11 +240,11 @@ L 0 2 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -252,11 +252,11 @@ L 69.805327 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -264,11 +264,11 @@ L 86.033359 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -276,11 +276,11 @@ L 100.090719 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -288,11 +288,11 @@ L 112.49019 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -300,11 +300,11 @@ L 196.552151 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -312,11 +312,11 @@ L 239.237014 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -324,11 +324,11 @@ L 269.522407 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -336,11 +336,11 @@ L 293.013582 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -348,11 +348,11 @@ L 312.20727 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -360,11 +360,11 @@ L 328.435302 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -372,11 +372,11 @@ L 342.492663 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -384,11 +384,11 @@ L 354.892134 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -396,11 +396,11 @@ L 438.954094 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -408,11 +408,11 @@ L 481.638957 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -420,11 +420,11 @@ L 511.92435 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -432,11 +432,11 @@ L 535.415525 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -444,11 +444,11 @@ L 554.609213 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -456,11 +456,11 @@ L 570.837245 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -468,11 +468,11 @@ L 584.894606 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -480,11 +480,11 @@ L 597.294077 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -492,11 +492,11 @@ L 681.356037 34.019562 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1010,16 +1010,16 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -1044,11 +1044,11 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1064,11 +1064,11 @@ L 724.90125 202.056486 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1084,11 +1084,11 @@ L 724.90125 161.07187 +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1136,11 +1136,11 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1197,11 +1197,11 @@ z +" clip-path="url(#pfd119c6915)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1328,7 +1328,7 @@ L 724.90125 34.019562 - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + - - + + + + + @@ -2572,7 +2715,7 @@ z - + @@ -2706,7 +2849,7 @@ z - + @@ -2753,7 +2896,7 @@ z - + @@ -2832,7 +2975,7 @@ z - + @@ -2879,7 +3022,7 @@ z - + @@ -2934,7 +3077,7 @@ z - + @@ -2977,7 +3120,7 @@ z - + @@ -3040,7 +3183,7 @@ z - + @@ -3132,7 +3275,7 @@ z - + @@ -3185,7 +3328,7 @@ z - + @@ -3239,7 +3382,7 @@ z - + @@ -3293,7 +3436,7 @@ z - + @@ -3352,7 +3495,7 @@ z - + @@ -3401,7 +3544,7 @@ z - + @@ -3461,7 +3604,7 @@ z - + @@ -3510,7 +3653,7 @@ z - + @@ -3584,7 +3727,7 @@ z - + @@ -3644,7 +3787,7 @@ z - + @@ -3701,7 +3844,7 @@ z - + @@ -3766,7 +3909,7 @@ z - + @@ -3822,7 +3965,7 @@ z - + @@ -3877,7 +4020,7 @@ z - + @@ -3932,7 +4075,7 @@ z - + @@ -3984,7 +4127,7 @@ z - + @@ -4040,7 +4183,7 @@ z - + @@ -4111,7 +4254,7 @@ z - + @@ -4171,7 +4314,7 @@ z - + @@ -4231,7 +4374,7 @@ z - + @@ -4295,7 +4438,7 @@ z - + @@ -4356,7 +4499,7 @@ z - + @@ -4405,7 +4548,7 @@ z - + @@ -4454,7 +4597,7 @@ z - + diff --git a/benchmarks/llm/results/report_assets/trial_variance.svg b/benchmarks/llm/results/report_assets/trial_variance.svg index 90b024da..c7621cb1 100644 --- a/benchmarks/llm/results/report_assets/trial_variance.svg +++ b/benchmarks/llm/results/report_assets/trial_variance.svg @@ -6,7 +6,7 @@ - 2026-05-15T20:20:33.651683 + 2026-05-15T22:54:11.004978 image/svg+xml @@ -43,7 +43,7 @@ L 226.120313 836.931009 L 226.120313 817.395274 L 226.120313 817.395274 z -" clip-path="url(#p3528eab364)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #2ca02c; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #f0a020; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: #d62728; stroke: #000000; stroke-width: 0.4; stroke-linejoin: miter"/> +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - - + @@ -355,11 +355,11 @@ z +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -428,11 +428,11 @@ z +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -450,11 +450,11 @@ L 352.607155 21.558281 +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -484,11 +484,11 @@ z +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -522,11 +522,11 @@ z +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -544,11 +544,11 @@ L 542.337418 21.558281 +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -566,11 +566,11 @@ L 605.580839 21.558281 +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -588,11 +588,11 @@ L 668.82426 21.558281 +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/> - + @@ -1072,12 +1072,12 @@ z - - + @@ -1202,7 +1202,7 @@ z - + @@ -1298,7 +1298,7 @@ z - + @@ -1370,7 +1370,7 @@ z - + @@ -1436,7 +1436,7 @@ z - + @@ -1476,7 +1476,7 @@ z - + @@ -1515,7 +1515,7 @@ z - + @@ -1576,7 +1576,7 @@ z - + @@ -1639,7 +1639,7 @@ z - + @@ -1667,7 +1667,7 @@ z - + @@ -1700,7 +1700,7 @@ z - + @@ -1782,7 +1782,7 @@ z - + @@ -1822,7 +1822,7 @@ z - + @@ -1849,7 +1849,7 @@ z - + @@ -1885,7 +1885,7 @@ z - + @@ -1911,7 +1911,7 @@ z - + @@ -1956,7 +1956,7 @@ z - + @@ -1983,7 +1983,7 @@ z - + @@ -2033,7 +2033,7 @@ z - + @@ -2059,7 +2059,7 @@ z - + @@ -2092,7 +2092,7 @@ z - + @@ -2128,7 +2128,7 @@ z - + @@ -2198,7 +2198,7 @@ z - + @@ -2236,7 +2236,7 @@ z - + @@ -2278,7 +2278,7 @@ z - + @@ -2322,7 +2322,7 @@ z - + @@ -2354,7 +2354,7 @@ z - + @@ -2391,7 +2391,7 @@ z - + @@ -2417,7 +2417,7 @@ z - + @@ -2449,7 +2449,7 @@ z - + @@ -2501,7 +2501,7 @@ z - + @@ -2522,7 +2522,7 @@ z - + @@ -2557,12 +2557,12 @@ z +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke-dasharray: 0.8,1.32; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.7; stroke-width: 0.8"/> +" clip-path="url(#p6ad0602a81)" style="fill: none; stroke-dasharray: 0.8,1.32; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.7; stroke-width: 0.8"/> + diff --git a/benchmarks/llm/src/benchmark/report.py b/benchmarks/llm/src/benchmark/report.py index 31c40178..c3dfaaae 100644 --- a/benchmarks/llm/src/benchmark/report.py +++ b/benchmarks/llm/src/benchmark/report.py @@ -1016,6 +1016,25 @@ def _avg_f1(stats: ModelStats) -> float: return stats.avg_f1 +_CATEGORICAL_CMAPS = ("tab20", "tab20b", "tab20c") # 20 colors each = 60 total + + +def _distinct_colors(n: int) -> list[tuple[float, ...]]: + """Return n visually distinct RGBA tuples, never repeating. + + Concatenating the tab20 family gives 60 categorical colors with good + perceptual contrast. Past that, fall back to evenly-spaced hsv samples + so each model still gets a unique color, accepting weaker contrast.""" + import matplotlib.pyplot as plt + palette: list[tuple[float, ...]] = [] + for name in _CATEGORICAL_CMAPS: + palette.extend(plt.get_cmap(name).colors) + if n <= len(palette): + return palette[:n] + cmap = plt.get_cmap("hsv") + return [cmap(i / n) for i in range(n)] + + def _render_pareto(stats: dict[str, ModelStats], path: Path) -> None: """Distinct color per model, legend rendered as a real matplotlib legend below the plot so each model's color sits next to its name.""" @@ -1027,13 +1046,12 @@ def _render_pareto(stats: dict[str, ModelStats], path: Path) -> None: points = [(s, f1) for s, f1 in points if not (f1 == 0 and s.total_episode_cost == 0)] points.sort(key=lambda t: (-t[1], t[0].total_episode_cost)) # rank by F1 desc, then cost asc - cmap = plt.get_cmap("tab20") + colors = _distinct_colors(len(points)) fig, ax = plt.subplots(figsize=(11, 9)) for i, (s, f1) in enumerate(points): - color = cmap(i % 20) ax.scatter( s.total_episode_cost, f1, - s=180, color=color, + s=180, color=colors[i], edgecolors="black", linewidths=0.7, zorder=3, label=f"{s.model} (F1 {f1:.3f}, ${s.total_episode_cost:.4f}/ep)", ) @@ -1683,7 +1701,7 @@ def _render_precision_recall_chart(stats: dict[str, ModelStats], path: Path) -> return points.sort(key=lambda t: -(2 * t[1] * t[2] / (t[1] + t[2]) if (t[1] + t[2]) > 0 else 0)) # F1 desc - cmap = plt.get_cmap("tab20") + colors = _distinct_colors(len(points)) fig, ax = plt.subplots(figsize=(11, 9)) # F1 isocurves: for each target F1, plot the curve precision*recall*2 / (p+r) = F1 @@ -1699,7 +1717,7 @@ def _render_precision_recall_chart(stats: dict[str, ModelStats], path: Path) -> for i, (s, p, r) in enumerate(points): f1 = 2 * p * r / (p + r) if (p + r) > 0 else 0 - ax.scatter(r, p, s=180, color=cmap(i % 20), + ax.scatter(r, p, s=180, color=colors[i], edgecolors="black", linewidths=0.7, zorder=3, label=f"{s.model} (P {p:.2f}, R {r:.2f}, F1 {f1:.2f})") @@ -1775,10 +1793,10 @@ def _render_token_efficiency_chart(stats: dict[str, ModelStats], path: Path) -> return points.sort(key=lambda t: -t[1]) - cmap = plt.get_cmap("tab20") + colors = _distinct_colors(len(points)) fig, ax = plt.subplots(figsize=(11, 8)) for i, (s, f1) in enumerate(points): - ax.scatter(s.tokens_per_detected_ad, f1, s=180, color=cmap(i % 20), + ax.scatter(s.tokens_per_detected_ad, f1, s=180, color=colors[i], edgecolors="black", linewidths=0.7, zorder=3, label=f"{s.model} (F1 {f1:.2f}, {s.tokens_per_detected_ad:.0f} tok/ad)") ax.set_xscale("log")