diff --git a/gui/public/logos/openrouter.png b/gui/public/logos/openrouter.png
new file mode 100644
index 00000000000..04f998a53af
Binary files /dev/null and b/gui/public/logos/openrouter.png differ
diff --git a/gui/src/components/modelSelection/ModelSelectionListbox.tsx b/gui/src/components/modelSelection/ModelSelectionListbox.tsx
index 04510b6bb33..87ba2822c56 100644
--- a/gui/src/components/modelSelection/ModelSelectionListbox.tsx
+++ b/gui/src/components/modelSelection/ModelSelectionListbox.tsx
@@ -2,8 +2,9 @@ import {
   CheckIcon,
   ChevronUpDownIcon,
   CubeIcon,
+  MagnifyingGlassIcon,
 } from "@heroicons/react/24/outline";
-import { Fragment } from "react";
+import { Fragment, useEffect, useMemo, useState } from "react";
 import {
   Listbox,
   ListboxButton,
@@ -19,6 +20,34 @@ interface ModelSelectionListboxProps {
   setSelectedProvider: (val: DisplayInfo) => void;
   topOptions?: DisplayInfo[];
   otherOptions?: DisplayInfo[];
+  searchPlaceholder?: string;
+}
+
+/**
+ * Simple fuzzy search algorithm
+ * Returns a score based on how well the query matches the text
+ */
+function fuzzyScore(query: string, text: string): number {
+  const q = query.toLowerCase();
+  const t = text.toLowerCase();
+
+  if (!q) return 1; // Empty query matches everything
+  if (!t) return 0;
+
+  let score = 0;
+  let queryIdx = 0;
+  let lastMatchIdx = -1;
+
+  for (let i = 0; i < t.length && queryIdx < q.length; i++) {
+    if (t[i] === q[queryIdx]) {
+      score += 1 + (lastMatchIdx === i - 1 ? 5 : 0); // Bonus for consecutive matches
+      lastMatchIdx = i;
+      queryIdx++;
+    }
+  }
+
+  // Return 0 if not all query characters were found
+  return queryIdx === q.length ? score / t.length : 0;
 }
 
 function ModelSelectionListbox({
@@ -26,9 +55,51 @@ function ModelSelectionListbox({
   setSelectedProvider,
   topOptions = [],
   otherOptions = [],
+  searchPlaceholder = "Search models...",
 }: ModelSelectionListboxProps) {
+  const [searchQuery, setSearchQuery] = useState("");
+
+  // Clear search query when provider changes
+  useEffect(() => {
+    setSearchQuery("");
+  }, [selectedProvider]);
+
+  // Combine and filter options based on fuzzy search
+  const filteredTopOptions = useMemo(() => {
+    if (!searchQuery) return topOptions;
+    return topOptions
+      .map((opt) => ({
+        option: opt,
+        score: fuzzyScore(searchQuery, opt.title),
+      }))
+      .filter(({ score }) => score > 0)
+      .sort((a, b) => b.score - a.score)
+      .map(({ option }) => option);
+  }, [searchQuery, topOptions]);
+
+  const filteredOtherOptions = useMemo(() => {
+    if (!searchQuery) return otherOptions;
+    return otherOptions
+      .map((opt) => ({
+        option: opt,
+        score: fuzzyScore(searchQuery, opt.title),
+      }))
+      .filter(({ score }) => score > 0)
+      .sort((a, b) => b.score - a.score)
+      .map(({ option }) => option);
+  }, [searchQuery, otherOptions]);
+
+  const hasResults =
+    filteredTopOptions.length > 0 || filteredOtherOptions.length > 0;
+
   return (
-    <Listbox value={selectedProvider} onChange={setSelectedProvider}>
+    <Listbox
+      value={selectedProvider}
+      onChange={(value) => {
+        setSelectedProvider(value);
+        setSearchQuery("");
+      }}
+    >
       <div className="relative mb-2 mt-1">
         <ListboxButton className="bg-background border-border text-foreground hover:bg-input relative m-0 grid h-full w-full cursor-pointer grid-cols-[1fr_auto] items-center rounded-lg border border-solid py-2 pl-3 pr-10 text-left focus:outline-none">
           <span className="flex items-center">
@@ -54,87 +125,120 @@ function ModelSelectionListbox({
           leaveFrom="opacity-100"
           leaveTo="opacity-0"
         >
-          <ListboxOptions className="bg-input rounded-default absolute left-0 top-full z-10 mt-1 h-fit w-3/5 overflow-y-auto p-0 focus:outline-none [&]:!max-h-[30vh]">
-            {topOptions.length > 0 && (
-              <div className="py-1">
-                <div className="text-description-muted px-3 py-1 text-xs font-medium uppercase tracking-wider">
-                  Popular
-                </div>
-                {topOptions.map((option, index) => (
-                  <ListboxOption
-                    key={index}
-                    className={({ selected }: { selected: boolean }) =>
-                      ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-default cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4`
-                    }
-                    value={option}
-                  >
-                    {({ selected }) => (
-                      <>
-                        <div className="flex items-center">
-                          {option.title === "Autodetect" ? (
-                            <CubeIcon className="mr-2 h-4 w-4" />
-                          ) : (
-                            window.vscMediaUrl &&
-                            option.icon && (
-                              <img
-                                src={`${window.vscMediaUrl}/logos/${option.icon}`}
-                                className="mr-2 h-4 w-4 object-contain object-center"
-                              />
-                            )
-                          )}
-                          <span className="text-xs">{option.title}</span>
-                        </div>
-                        {selected && (
-                          <CheckIcon className="h-3 w-3" aria-hidden="true" />
-                        )}
-                      </>
-                    )}
-                  </ListboxOption>
-                ))}
+          <ListboxOptions className="bg-input rounded-default absolute left-0 top-full z-10 mt-1 flex h-fit w-3/5 flex-col overflow-y-auto p-0 focus:outline-none [&]:!max-h-[30vh]">
+            {/* Search Box */}
+            <div className="border-border sticky top-0 border-b p-2">
+              <div className="bg-background border-border flex items-center rounded border pl-2">
+                <MagnifyingGlassIcon className="text-description-muted h-4 w-4" />
+                <input
+                  type="text"
+                  placeholder={searchPlaceholder}
+                  value={searchQuery}
+                  onChange={(e) => setSearchQuery(e.target.value)}
+                  className="bg-background text-foreground placeholder-description-muted w-full border-0 px-2 py-1.5 outline-none"
+                  onClick={(e) => e.stopPropagation()}
+                />
               </div>
-            )}
-            {topOptions.length > 0 && otherOptions.length > 0 && (
-              <div className="bg-border my-1 h-px min-h-px" />
-            )}
-            {otherOptions.length > 0 && (
-              <div className="py-1">
-                <div className="text-description-muted px-3 py-1 text-xs font-medium uppercase tracking-wider">
-                  Additional providers
+            </div>
+
+            {/* Results */}
+            <div className="flex-1 overflow-y-auto">
+              {!hasResults ? (
+                <div className="text-description-muted px-3 py-4 text-center text-xs">
+                  No models found matching "{searchQuery}"
                 </div>
-                {otherOptions.map((option, index) => (
-                  <ListboxOption
-                    key={index}
-                    className={({ selected }: { selected: boolean }) =>
-                      ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-default cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4`
-                    }
-                    value={option}
-                  >
-                    {({ selected }) => (
-                      <>
-                        <div className="flex items-center">
-                          {option.title === "Autodetect" ? (
-                            <CubeIcon className="mr-2 h-4 w-4" />
-                          ) : (
-                            window.vscMediaUrl &&
-                            option.icon && (
-                              <img
-                                src={`${window.vscMediaUrl}/logos/${option.icon}`}
-                                className="mr-2 h-4 w-4 object-contain object-center"
-                              />
-                            )
+              ) : (
+                <>
+                  {filteredTopOptions.length > 0 && (
+                    <div className="py-1">
+                      <div className="text-description-muted px-3 py-1 text-xs font-medium uppercase tracking-wider">
+                        Popular
+                      </div>
+                      {filteredTopOptions.map((option, index) => (
+                        <ListboxOption
+                          key={index}
+                          className={({ selected }: { selected: boolean }) =>
+                            ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4`
+                          }
+                          value={option}
+                        >
+                          {({ selected }) => (
+                            <>
+                              <div className="flex items-center">
+                                {option.title === "Autodetect" ? (
+                                  <CubeIcon className="mr-2 h-4 w-4" />
+                                ) : (
+                                  window.vscMediaUrl &&
+                                  option.icon && (
+                                    <img
+                                      src={`${window.vscMediaUrl}/logos/${option.icon}`}
+                                      className="mr-2 h-4 w-4 object-contain object-center"
+                                    />
+                                  )
+                                )}
+                                <span className="text-xs">{option.title}</span>
+                              </div>
+                              {selected && (
+                                <CheckIcon
+                                  className="h-3 w-3"
+                                  aria-hidden="true"
+                                />
+                              )}
+                            </>
                           )}
-                          <span className="text-xs">{option.title}</span>
-                        </div>
-
-                        {selected && (
-                          <CheckIcon className="h-3 w-3" aria-hidden="true" />
-                        )}
-                      </>
+                        </ListboxOption>
+                      ))}
+                    </div>
+                  )}
+                  {filteredTopOptions.length > 0 &&
+                    filteredOtherOptions.length > 0 && (
+                      <div className="bg-border my-1 h-px min-h-px" />
                     )}
-                  </ListboxOption>
-                ))}
-              </div>
-            )}
+                  {filteredOtherOptions.length > 0 && (
+                    <div className="py-1">
+                      <div className="text-description-muted px-3 py-1 text-xs font-medium uppercase tracking-wider">
+                        Additional providers
+                      </div>
+                      {filteredOtherOptions.map((option, index) => (
+                        <ListboxOption
+                          key={index}
+                          className={({ selected }: { selected: boolean }) =>
+                            ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4`
+                          }
+                          value={option}
+                        >
+                          {({ selected }) => (
+                            <>
+                              <div className="flex items-center">
+                                {option.title === "Autodetect" ? (
+                                  <CubeIcon className="mr-2 h-4 w-4" />
+                                ) : (
+                                  window.vscMediaUrl &&
+                                  option.icon && (
+                                    <img
+                                      src={`${window.vscMediaUrl}/logos/${option.icon}`}
+                                      className="mr-2 h-4 w-4 object-contain object-center"
+                                    />
+                                  )
+                                )}
+                                <span className="text-xs">{option.title}</span>
+                              </div>
+
+                              {selected && (
+                                <CheckIcon
+                                  className="h-3 w-3"
+                                  aria-hidden="true"
+                                />
+                              )}
+                            </>
+                          )}
+                        </ListboxOption>
+                      ))}
+                    </div>
+                  )}
+                </>
+              )}
+            </div>
           </ListboxOptions>
         </Transition>
       </div>
diff --git a/gui/src/forms/AddModelForm.tsx b/gui/src/forms/AddModelForm.tsx
index 61a7142992e..3d24816c4d0 100644
--- a/gui/src/forms/AddModelForm.tsx
+++ b/gui/src/forms/AddModelForm.tsx
@@ -47,6 +47,7 @@ export function AddModelForm({
     providers["gemini"]?.title || "",
     providers["azure"]?.title || "",
     providers["ollama"]?.title || "",
+    providers["openrouter"]?.title || "",
   ];
 
   const allProviders = Object.entries(providers)
@@ -149,6 +150,7 @@ export function AddModelForm({
                 }}
                 topOptions={popularProviders}
                 otherOptions={otherProviders}
+                searchPlaceholder="Search providers..."
               />
               <span className="text-description-muted mt-1 block text-xs">
                 Don't see your provider?{" "}
diff --git a/gui/src/pages/AddNewModel/configs/openRouterModel.ts b/gui/src/pages/AddNewModel/configs/openRouterModel.ts
new file mode 100644
index 00000000000..2c5ba5b4ac2
--- /dev/null
+++ b/gui/src/pages/AddNewModel/configs/openRouterModel.ts
@@ -0,0 +1,74 @@
+import { ModelPackage } from "./models";
+import openRouterModelsData from "./openRouterModels.json";
+
+interface OpenRouterModel {
+  id: string;
+  name: string;
+  description: string;
+  context_length: number;
+  hugging_face_id: string;
+}
+
+/**
+ * Convert OpenRouter model data to ModelPackage format
+ */
+function convertOpenRouterModelToPackage(model: OpenRouterModel): ModelPackage {
+  // Extract provider name from id (e.g., "openai/gpt-5.1" -> "openai")
+  const [provider] = model.id.split("/");
+
+  return {
+    title: model.name,
+    description: model.description,
+    refUrl: `https://openrouter.ai/models/${model.id}`,
+    params: {
+      model: model.id,
+      contextLength: model.context_length,
+    },
+    isOpenSource: !!model.hugging_face_id,
+    tags: [provider as any],
+  };
+}
+
+/**
+ * Generate ModelPackage objects from OpenRouter models JSON
+ */
+export function generateOpenRouterModels(): {
+  [key: string]: ModelPackage;
+} {
+  const models: { [key: string]: ModelPackage } = {};
+
+  const data = openRouterModelsData as { data: OpenRouterModel[] };
+
+  if (!data.data || !Array.isArray(data.data)) {
+    console.warn("Invalid OpenRouter models data structure");
+    return models;
+  }
+
+  data.data.forEach((model: OpenRouterModel) => {
+    if (!model.id || !model.name) {
+      console.warn("Skipping model with missing id or name", model);
+      return;
+    }
+
+    // Create a unique key from the model id (replace slashes and dots with underscores)
+    const key = model.id.replace(/[\/.]/g, "_");
+
+    try {
+      models[key] = convertOpenRouterModelToPackage(model);
+    } catch (error) {
+      console.error(`Failed to convert model ${model.id}:`, error);
+    }
+  });
+
+  return models;
+}
+
+/**
+ * Export all OpenRouter models as a pre-generated object
+ */
+export const openRouterModels = generateOpenRouterModels();
+
+/**
+ * Export OpenRouter models as an array for use in provider packages
+ */
+export const openRouterModelsList = Object.values(openRouterModels);
diff --git a/gui/src/pages/AddNewModel/configs/openRouterModels.json b/gui/src/pages/AddNewModel/configs/openRouterModels.json
new file mode 100644
index 00000000000..e4c6fe0e63e
--- /dev/null
+++ b/gui/src/pages/AddNewModel/configs/openRouterModels.json
@@ -0,0 +1,2412 @@
+{
+  "data": [
+    {
+      "id": "openai/gpt-5.1",
+      "name": "OpenAI: GPT-5.1",
+      "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5.1-chat",
+      "name": "OpenAI: GPT-5.1 Chat",
+      "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5.1-codex",
+      "name": "OpenAI: GPT-5.1-Codex",
+      "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5.1-codex-mini",
+      "name": "OpenAI: GPT-5.1-Codex-Mini",
+      "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "kwaipilot/kat-coder-pro:free",
+      "name": "Kwaipilot: KAT-Coder-Pro V1 (free)",
+      "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "moonshotai/kimi-linear-48b-a3b-instruct",
+      "name": "MoonshotAI: Kimi Linear 48B A3B Instruct",
+      "description": "Kimi Linear is a hybrid linear attention architecture that outperforms traditional full attention methods across various contexts, including short, long, and reinforcement learning (RL) scaling.",
+      "context_length": 1048576,
+      "hugging_face_id": "moonshotai/Kimi-Linear-48B-A3B-Instruct"
+    },
+    {
+      "id": "moonshotai/kimi-k2-thinking",
+      "name": "MoonshotAI: Kimi K2 Thinking",
+      "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning.",
+      "context_length": 262144,
+      "hugging_face_id": "moonshotai/Kimi-K2-Thinking"
+    },
+    {
+      "id": "amazon/nova-premier-v1",
+      "name": "Amazon: Nova Premier 1.0",
+      "description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "perplexity/sonar-pro-search",
+      "name": "Perplexity: Sonar Pro Search",
+      "description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/voxtral-small-24b-2507",
+      "name": "Mistral: Voxtral Small 24B 2507",
+      "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance.",
+      "context_length": 32000,
+      "hugging_face_id": "mistralai/Voxtral-Small-24B-2507"
+    },
+    {
+      "id": "openai/gpt-oss-safeguard-20b",
+      "name": "OpenAI: gpt-oss-safeguard-20b",
+      "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b.",
+      "context_length": 131072,
+      "hugging_face_id": "openai/gpt-oss-safeguard-20b"
+    },
+    {
+      "id": "nvidia/nemotron-nano-12b-v2-vl:free",
+      "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)",
+      "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence.",
+      "context_length": 128000,
+      "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16"
+    },
+    {
+      "id": "nvidia/nemotron-nano-12b-v2-vl",
+      "name": "NVIDIA: Nemotron Nano 12B 2 VL",
+      "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence.",
+      "context_length": 131072,
+      "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16"
+    },
+    {
+      "id": "minimax/minimax-m2",
+      "name": "MiniMax: MiniMax M2",
+      "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows.",
+      "context_length": 204800,
+      "hugging_face_id": "MiniMaxAI/MiniMax-M2"
+    },
+    {
+      "id": "liquid/lfm2-8b-a1b",
+      "name": "LiquidAI/LFM2-8B-A1B",
+      "description": "Model created via inbox interface.",
+      "context_length": 32768,
+      "hugging_face_id": "LiquidAI/LFM2-8B-A1B"
+    },
+    {
+      "id": "liquid/lfm-2.2-6b",
+      "name": "LiquidAI/LFM2-2.6B",
+      "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment.",
+      "context_length": 32768,
+      "hugging_face_id": "LiquidAI/LFM2-2.6B"
+    },
+    {
+      "id": "ibm-granite/granite-4.0-h-micro",
+      "name": "IBM: Granite 4.0 Micro",
+      "description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models.",
+      "context_length": 131000,
+      "hugging_face_id": "ibm-granite/granite-4.0-h-micro"
+    },
+    {
+      "id": "deepcogito/cogito-v2-preview-llama-405b",
+      "name": "Deep Cogito: Cogito V2 Preview Llama 405B",
+      "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection.",
+      "context_length": 32768,
+      "hugging_face_id": "deepcogito/cogito-v2-preview-llama-405B"
+    },
+    {
+      "id": "openai/gpt-5-image-mini",
+      "name": "OpenAI: GPT-5 Image Mini",
+      "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-haiku-4.5",
+      "name": "Anthropic: Claude Haiku 4.5",
+      "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-vl-8b-thinking",
+      "name": "Qwen: Qwen3 VL 8B Thinking",
+      "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences.",
+      "context_length": 256000,
+      "hugging_face_id": "Qwen/Qwen3-VL-8B-Thinking"
+    },
+    {
+      "id": "qwen/qwen3-vl-8b-instruct",
+      "name": "Qwen: Qwen3 VL 8B Instruct",
+      "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video.",
+      "context_length": 131072,
+      "hugging_face_id": "Qwen/Qwen3-VL-8B-Instruct"
+    },
+    {
+      "id": "openai/gpt-5-image",
+      "name": "OpenAI: GPT-5 Image",
+      "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's most advanced language model with state-of-the-art image generation capabilities.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "inclusionai/ring-1t",
+      "name": "inclusionAI: Ring 1T",
+      "description": "Ring-1T has undergone continued scaling with large-scale verifiable reward reinforcement learning (RLVR) training, further unlocking the natural language reasoning capabilities of the.",
+      "context_length": 131072,
+      "hugging_face_id": "inclusionAI/Ring-1T"
+    },
+    {
+      "id": "inclusionai/ling-1t",
+      "name": "inclusionAI: Ling-1T",
+      "description": "Ling-1T is a trillion-parameter open-weight large language model developed by inclusionAI and released under the MIT license.",
+      "context_length": 131072,
+      "hugging_face_id": "inclusionAI/Ling-1T"
+    },
+    {
+      "id": "openai/o3-deep-research",
+      "name": "OpenAI: o3 Deep Research",
+      "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/o4-mini-deep-research",
+      "name": "OpenAI: o4 Mini Deep Research",
+      "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+      "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5",
+      "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context.",
+      "context_length": 131072,
+      "hugging_face_id": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5"
+    },
+    {
+      "id": "baidu/ernie-4.5-21b-a3b-thinking",
+      "name": "Baidu: ERNIE 4.5 21B A3B Thinking",
+      "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation,.",
+      "context_length": 131072,
+      "hugging_face_id": "baidu/ERNIE-4.5-21B-A3B-Thinking"
+    },
+    {
+      "id": "google/gemini-2.5-flash-image",
+      "name": "Google: Gemini 2.5 Flash Image (Nano Banana)",
+      "description": "Gemini 2.5 Flash Image, a.k.a.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-vl-30b-a3b-thinking",
+      "name": "Qwen: Qwen3 VL 30B A3B Thinking",
+      "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos.",
+      "context_length": 131072,
+      "hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Thinking"
+    },
+    {
+      "id": "qwen/qwen3-vl-30b-a3b-instruct",
+      "name": "Qwen: Qwen3 VL 30B A3B Instruct",
+      "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Instruct"
+    },
+    {
+      "id": "openai/gpt-5-pro",
+      "name": "OpenAI: GPT-5 Pro",
+      "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "z-ai/glm-4.6",
+      "name": "Z.AI: GLM 4.6",
+      "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex.",
+      "context_length": 202752,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "z-ai/glm-4.6:exacto",
+      "name": "Z.AI: GLM 4.6 (exacto)",
+      "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex.",
+      "context_length": 202752,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-sonnet-4.5",
+      "name": "Anthropic: Claude Sonnet 4.5",
+      "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "deepseek/deepseek-v3.2-exp",
+      "name": "DeepSeek: DeepSeek V3.2 Exp",
+      "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3.2-Exp"
+    },
+    {
+      "id": "thedrummer/cydonia-24b-v4.1",
+      "name": "TheDrummer: Cydonia 24B V4.1",
+      "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.",
+      "context_length": 131072,
+      "hugging_face_id": "thedrummer/cydonia-24b-v4.1"
+    },
+    {
+      "id": "relace/relace-apply-3",
+      "name": "Relace: Relace Apply 3",
+      "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-flash-preview-09-2025",
+      "name": "Google: Gemini 2.5 Flash Preview 09-2025",
+      "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-flash-lite-preview-09-2025",
+      "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025",
+      "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-vl-235b-a22b-thinking",
+      "name": "Qwen: Qwen3 VL 235B A22B Thinking",
+      "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-VL-235B-A22B-Thinking"
+    },
+    {
+      "id": "qwen/qwen3-vl-235b-a22b-instruct",
+      "name": "Qwen: Qwen3 VL 235B A22B Instruct",
+      "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-VL-235B-A22B-Instruct"
+    },
+    {
+      "id": "qwen/qwen3-max",
+      "name": "Qwen: Qwen3 Max",
+      "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-coder-plus",
+      "name": "Qwen: Qwen3 Coder Plus",
+      "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5-codex",
+      "name": "OpenAI: GPT-5 Codex",
+      "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "deepseek/deepseek-v3.1-terminus",
+      "name": "DeepSeek: DeepSeek V3.1 Terminus",
+      "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3.1-Terminus"
+    },
+    {
+      "id": "deepseek/deepseek-v3.1-terminus:exacto",
+      "name": "DeepSeek: DeepSeek V3.1 Terminus (exacto)",
+      "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language.",
+      "context_length": 131072,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3.1-Terminus"
+    },
+    {
+      "id": "x-ai/grok-4-fast",
+      "name": "xAI: Grok 4 Fast",
+      "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window.",
+      "context_length": 2000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "alibaba/tongyi-deepresearch-30b-a3b:free",
+      "name": "Tongyi DeepResearch 30B A3B (free)",
+      "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token.",
+      "context_length": 131072,
+      "hugging_face_id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B"
+    },
+    {
+      "id": "alibaba/tongyi-deepresearch-30b-a3b",
+      "name": "Tongyi DeepResearch 30B A3B",
+      "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token.",
+      "context_length": 131072,
+      "hugging_face_id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B"
+    },
+    {
+      "id": "qwen/qwen3-coder-flash",
+      "name": "Qwen: Qwen3 Coder Flash",
+      "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "arcee-ai/afm-4.5b",
+      "name": "Arcee AI: AFM 4.5B",
+      "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI.",
+      "context_length": 65536,
+      "hugging_face_id": "arcee-ai/AFM-4.5B"
+    },
+    {
+      "id": "opengvlab/internvl3-78b",
+      "name": "OpenGVLab: InternVL3 78B",
+      "description": "The InternVL3 series is an advanced multimodal large language model (MLLM).",
+      "context_length": 32768,
+      "hugging_face_id": "OpenGVLab/InternVL3-78B"
+    },
+    {
+      "id": "qwen/qwen3-next-80b-a3b-thinking",
+      "name": "Qwen: Qwen3 Next 80B A3B Thinking",
+      "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Thinking"
+    },
+    {
+      "id": "qwen/qwen3-next-80b-a3b-instruct",
+      "name": "Qwen: Qwen3 Next 80B A3B Instruct",
+      "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Instruct"
+    },
+    {
+      "id": "meituan/longcat-flash-chat:free",
+      "name": "Meituan: LongCat Flash Chat (free)",
+      "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input.",
+      "context_length": 131072,
+      "hugging_face_id": "meituan-longcat/LongCat-Flash-Chat"
+    },
+    {
+      "id": "meituan/longcat-flash-chat",
+      "name": "Meituan: LongCat Flash Chat",
+      "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input.",
+      "context_length": 131072,
+      "hugging_face_id": "meituan-longcat/LongCat-Flash-Chat"
+    },
+    {
+      "id": "qwen/qwen-plus-2025-07-28",
+      "name": "Qwen: Qwen Plus 0728",
+      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-plus-2025-07-28:thinking",
+      "name": "Qwen: Qwen Plus 0728 (thinking)",
+      "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "nvidia/nemotron-nano-9b-v2:free",
+      "name": "NVIDIA: Nemotron Nano 9B V2 (free)",
+      "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks.",
+      "context_length": 128000,
+      "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
+    },
+    {
+      "id": "nvidia/nemotron-nano-9b-v2",
+      "name": "NVIDIA: Nemotron Nano 9B V2",
+      "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks.",
+      "context_length": 131072,
+      "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
+    },
+    {
+      "id": "moonshotai/kimi-k2-0905",
+      "name": "MoonshotAI: Kimi K2 0905",
+      "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2).",
+      "context_length": 262144,
+      "hugging_face_id": "moonshotai/Kimi-K2-Instruct-0905"
+    },
+    {
+      "id": "moonshotai/kimi-k2-0905:exacto",
+      "name": "MoonshotAI: Kimi K2 0905 (exacto)",
+      "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2).",
+      "context_length": 262144,
+      "hugging_face_id": "moonshotai/Kimi-K2-Instruct-0905"
+    },
+    {
+      "id": "deepcogito/cogito-v2-preview-llama-70b",
+      "name": "Deep Cogito: Cogito V2 Preview Llama 70B",
+      "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection.",
+      "context_length": 32768,
+      "hugging_face_id": "deepcogito/cogito-v2-preview-llama-70B"
+    },
+    {
+      "id": "deepcogito/cogito-v2-preview-llama-109b-moe",
+      "name": "Cogito V2 Preview Llama 109B",
+      "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E.",
+      "context_length": 32767,
+      "hugging_face_id": "deepcogito/cogito-v2-preview-llama-109B-MoE"
+    },
+    {
+      "id": "deepcogito/cogito-v2-preview-deepseek-671b",
+      "name": "Deep Cogito: Cogito V2 Preview Deepseek 671B",
+      "description": "Cogito v2 is a multilingual, instruction-tuned Mixture of Experts (MoE) large language model with 671 billion parameters.",
+      "context_length": 163840,
+      "hugging_face_id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE"
+    },
+    {
+      "id": "stepfun-ai/step3",
+      "name": "StepFun: Step3",
+      "description": "Step3 is a cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active.",
+      "context_length": 65536,
+      "hugging_face_id": "stepfun-ai/step3"
+    },
+    {
+      "id": "qwen/qwen3-30b-a3b-thinking-2507",
+      "name": "Qwen: Qwen3 30B A3B Thinking 2507",
+      "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-30B-A3B-Thinking-2507"
+    },
+    {
+      "id": "x-ai/grok-code-fast-1",
+      "name": "xAI: Grok Code Fast 1",
+      "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "nousresearch/hermes-4-70b",
+      "name": "Nous: Hermes 4 70B",
+      "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B.",
+      "context_length": 131072,
+      "hugging_face_id": "NousResearch/Hermes-4-70B"
+    },
+    {
+      "id": "nousresearch/hermes-4-405b",
+      "name": "Nous: Hermes 4 405B",
+      "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research.",
+      "context_length": 131072,
+      "hugging_face_id": "NousResearch/Hermes-4-405B"
+    },
+    {
+      "id": "google/gemini-2.5-flash-image-preview",
+      "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)",
+      "description": "Gemini 2.5 Flash Image Preview, a.k.a.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "deepseek/deepseek-chat-v3.1:free",
+      "name": "DeepSeek: DeepSeek V3.1 (free)",
+      "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates.",
+      "context_length": 163800,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3.1"
+    },
+    {
+      "id": "deepseek/deepseek-chat-v3.1",
+      "name": "DeepSeek: DeepSeek V3.1",
+      "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3.1"
+    },
+    {
+      "id": "openai/gpt-4o-audio-preview",
+      "name": "OpenAI: GPT-4o Audio",
+      "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-medium-3.1",
+      "name": "Mistral: Mistral Medium 3.1",
+      "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "baidu/ernie-4.5-21b-a3b",
+      "name": "Baidu: ERNIE 4.5 21B A3B",
+      "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through.",
+      "context_length": 120000,
+      "hugging_face_id": "baidu/ERNIE-4.5-21B-A3B-PT"
+    },
+    {
+      "id": "baidu/ernie-4.5-vl-28b-a3b",
+      "name": "Baidu: ERNIE 4.5 VL 28B A3B",
+      "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative.",
+      "context_length": 30000,
+      "hugging_face_id": "baidu/ERNIE-4.5-VL-28B-A3B-PT"
+    },
+    {
+      "id": "z-ai/glm-4.5v",
+      "name": "Z.AI: GLM 4.5V",
+      "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications.",
+      "context_length": 65536,
+      "hugging_face_id": "zai-org/GLM-4.5V"
+    },
+    {
+      "id": "ai21/jamba-mini-1.7",
+      "name": "AI21: Jamba Mini 1.7",
+      "description": "Jamba Mini 1.7 is a compact and efficient member of the Jamba open model family, incorporating key improvements in grounding and instruction-following while maintaining the benefits of the.",
+      "context_length": 256000,
+      "hugging_face_id": "ai21labs/AI21-Jamba-Mini-1.7"
+    },
+    {
+      "id": "ai21/jamba-large-1.7",
+      "name": "AI21: Jamba Large 1.7",
+      "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency.",
+      "context_length": 256000,
+      "hugging_face_id": "ai21labs/AI21-Jamba-Large-1.7"
+    },
+    {
+      "id": "openai/gpt-5-chat",
+      "name": "OpenAI: GPT-5 Chat",
+      "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5",
+      "name": "OpenAI: GPT-5",
+      "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5-mini",
+      "name": "OpenAI: GPT-5 Mini",
+      "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-5-nano",
+      "name": "OpenAI: GPT-5 Nano",
+      "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments.",
+      "context_length": 400000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-oss-120b",
+      "name": "OpenAI: gpt-oss-120b",
+      "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases.",
+      "context_length": 131072,
+      "hugging_face_id": "openai/gpt-oss-120b"
+    },
+    {
+      "id": "openai/gpt-oss-120b:exacto",
+      "name": "OpenAI: gpt-oss-120b (exacto)",
+      "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases.",
+      "context_length": 131072,
+      "hugging_face_id": "openai/gpt-oss-120b"
+    },
+    {
+      "id": "openai/gpt-oss-20b:free",
+      "name": "OpenAI: gpt-oss-20b (free)",
+      "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license.",
+      "context_length": 131072,
+      "hugging_face_id": "openai/gpt-oss-20b"
+    },
+    {
+      "id": "openai/gpt-oss-20b",
+      "name": "OpenAI: gpt-oss-20b",
+      "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license.",
+      "context_length": 131072,
+      "hugging_face_id": "openai/gpt-oss-20b"
+    },
+    {
+      "id": "anthropic/claude-opus-4.1",
+      "name": "Anthropic: Claude Opus 4.1",
+      "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/codestral-2508",
+      "name": "Mistral: Codestral 2508",
+      "description": "Mistral's cutting-edge language model for coding released end of July 2025.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-coder-30b-a3b-instruct",
+      "name": "Qwen: Qwen3 Coder 30B A3B Instruct",
+      "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding,.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct"
+    },
+    {
+      "id": "qwen/qwen3-30b-a3b-instruct-2507",
+      "name": "Qwen: Qwen3 30B A3B Instruct 2507",
+      "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-30B-A3B-Instruct-2507"
+    },
+    {
+      "id": "z-ai/glm-4.5",
+      "name": "Z.AI: GLM 4.5",
+      "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications.",
+      "context_length": 131072,
+      "hugging_face_id": "zai-org/GLM-4.5"
+    },
+    {
+      "id": "z-ai/glm-4.5-air:free",
+      "name": "Z.AI: GLM 4.5 Air (free)",
+      "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications.",
+      "context_length": 131072,
+      "hugging_face_id": "zai-org/GLM-4.5-Air"
+    },
+    {
+      "id": "z-ai/glm-4.5-air",
+      "name": "Z.AI: GLM 4.5 Air",
+      "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications.",
+      "context_length": 131072,
+      "hugging_face_id": "zai-org/GLM-4.5-Air"
+    },
+    {
+      "id": "qwen/qwen3-235b-a22b-thinking-2507",
+      "name": "Qwen: Qwen3 235B A22B Thinking 2507",
+      "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-235B-A22B-Thinking-2507"
+    },
+    {
+      "id": "z-ai/glm-4-32b",
+      "name": "Z.AI: GLM 4 32B ",
+      "description": "GLM 4 32B is a cost-effective foundation language model.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-coder:free",
+      "name": "Qwen: Qwen3 Coder 480B A35B (free)",
+      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.",
+      "context_length": 262000,
+      "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
+    },
+    {
+      "id": "qwen/qwen3-coder",
+      "name": "Qwen: Qwen3 Coder 480B A35B",
+      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
+    },
+    {
+      "id": "qwen/qwen3-coder:exacto",
+      "name": "Qwen: Qwen3 Coder 480B A35B (exacto)",
+      "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct"
+    },
+    {
+      "id": "bytedance/ui-tars-1.5-7b",
+      "name": "ByteDance: UI-TARS 7B ",
+      "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games.",
+      "context_length": 128000,
+      "hugging_face_id": "ByteDance-Seed/UI-TARS-1.5-7B"
+    },
+    {
+      "id": "google/gemini-2.5-flash-lite",
+      "name": "Google: Gemini 2.5 Flash Lite",
+      "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-235b-a22b-2507",
+      "name": "Qwen: Qwen3 235B A22B Instruct 2507",
+      "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass.",
+      "context_length": 262144,
+      "hugging_face_id": "Qwen/Qwen3-235B-A22B-Instruct-2507"
+    },
+    {
+      "id": "switchpoint/router",
+      "name": "Switchpoint Router",
+      "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "moonshotai/kimi-k2:free",
+      "name": "MoonshotAI: Kimi K2 0711 (free)",
+      "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass.",
+      "context_length": 32768,
+      "hugging_face_id": "moonshotai/Kimi-K2-Instruct"
+    },
+    {
+      "id": "moonshotai/kimi-k2",
+      "name": "MoonshotAI: Kimi K2 0711",
+      "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass.",
+      "context_length": 131072,
+      "hugging_face_id": "moonshotai/Kimi-K2-Instruct"
+    },
+    {
+      "id": "thudm/glm-4.1v-9b-thinking",
+      "name": "THUDM: GLM 4.1V 9B Thinking",
+      "description": "GLM-4.1V-9B-Thinking is a 9B parameter vision-language model developed by THUDM, based on the GLM-4-9B foundation.",
+      "context_length": 65536,
+      "hugging_face_id": "THUDM/GLM-4.1V-9B-Thinking"
+    },
+    {
+      "id": "mistralai/devstral-medium",
+      "name": "Mistral: Devstral Medium",
+      "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/devstral-small",
+      "name": "Mistral: Devstral Small 1.1",
+      "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI.",
+      "context_length": 128000,
+      "hugging_face_id": "mistralai/Devstral-Small-2507"
+    },
+    {
+      "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+      "name": "Venice: Uncensored (free)",
+      "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai.",
+      "context_length": 32768,
+      "hugging_face_id": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition"
+    },
+    {
+      "id": "x-ai/grok-4",
+      "name": "xAI: Grok 4",
+      "description": "Grok 4 is xAI's latest reasoning model with a 256k context window.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemma-3n-e2b-it:free",
+      "name": "Google: Gemma 3n 2B (free)",
+      "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture.",
+      "context_length": 8192,
+      "hugging_face_id": "google/gemma-3n-E2B-it"
+    },
+    {
+      "id": "tencent/hunyuan-a13b-instruct",
+      "name": "Tencent: Hunyuan A13B Instruct",
+      "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought.",
+      "context_length": 131072,
+      "hugging_face_id": "tencent/Hunyuan-A13B-Instruct"
+    },
+    {
+      "id": "tngtech/deepseek-r1t2-chimera:free",
+      "name": "TNG: DeepSeek R1T2 Chimera (free)",
+      "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech.",
+      "context_length": 163840,
+      "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera"
+    },
+    {
+      "id": "tngtech/deepseek-r1t2-chimera",
+      "name": "TNG: DeepSeek R1T2 Chimera",
+      "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech.",
+      "context_length": 163840,
+      "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera"
+    },
+    {
+      "id": "morph/morph-v3-large",
+      "name": "Morph: Morph V3 Large",
+      "description": "Morph's high-accuracy apply model for complex code edits.",
+      "context_length": 262144,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "morph/morph-v3-fast",
+      "name": "Morph: Morph V3 Fast",
+      "description": "Morph's fastest apply model for code edits.",
+      "context_length": 81920,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "baidu/ernie-4.5-vl-424b-a47b",
+      "name": "Baidu: ERNIE 4.5 VL 424B A47B ",
+      "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token.",
+      "context_length": 123000,
+      "hugging_face_id": "baidu/ERNIE-4.5-VL-424B-A47B-PT"
+    },
+    {
+      "id": "baidu/ernie-4.5-300b-a47b",
+      "name": "Baidu: ERNIE 4.5 300B A47B ",
+      "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series.",
+      "context_length": 123000,
+      "hugging_face_id": "baidu/ERNIE-4.5-300B-A47B-PT"
+    },
+    {
+      "id": "thedrummer/anubis-70b-v1.1",
+      "name": "TheDrummer: Anubis 70B V1.1",
+      "description": "TheDrummer's Anubis v1.1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories.",
+      "context_length": 131072,
+      "hugging_face_id": "TheDrummer/Anubis-70B-v1.1"
+    },
+    {
+      "id": "inception/mercury",
+      "name": "Inception: Mercury",
+      "description": "Mercury is the first diffusion large language model (dLLM).",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-small-3.2-24b-instruct:free",
+      "name": "Mistral: Mistral Small 3.2 24B (free)",
+      "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling.",
+      "context_length": 131072,
+      "hugging_face_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506"
+    },
+    {
+      "id": "mistralai/mistral-small-3.2-24b-instruct",
+      "name": "Mistral: Mistral Small 3.2 24B",
+      "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling.",
+      "context_length": 131072,
+      "hugging_face_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506"
+    },
+    {
+      "id": "minimax/minimax-m1",
+      "name": "MiniMax: MiniMax M1",
+      "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-flash-lite-preview-06-17",
+      "name": "Google: Gemini 2.5 Flash Lite Preview 06-17",
+      "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-flash",
+      "name": "Google: Gemini 2.5 Flash",
+      "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-pro",
+      "name": "Google: Gemini 2.5 Pro",
+      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "moonshotai/kimi-dev-72b",
+      "name": "MoonshotAI: Kimi Dev 72B",
+      "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks.",
+      "context_length": 131072,
+      "hugging_face_id": "moonshotai/Kimi-Dev-72B"
+    },
+    {
+      "id": "openai/o3-pro",
+      "name": "OpenAI: o3 Pro",
+      "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "x-ai/grok-3-mini",
+      "name": "xAI: Grok 3 Mini",
+      "description": "A lightweight model that thinks before responding.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "x-ai/grok-3",
+      "name": "xAI: Grok 3",
+      "description": "Grok 3 is the latest model from xAI.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/magistral-small-2506",
+      "name": "Mistral: Magistral Small 2506",
+      "description": "Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via.",
+      "context_length": 40000,
+      "hugging_face_id": "mistralai/Magistral-Small-2506"
+    },
+    {
+      "id": "mistralai/magistral-medium-2506:thinking",
+      "name": "Mistral: Magistral Medium 2506 (thinking)",
+      "description": "Magistral is Mistral's first reasoning model.",
+      "context_length": 40960,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/magistral-medium-2506",
+      "name": "Mistral: Magistral Medium 2506",
+      "description": "Magistral is Mistral's first reasoning model.",
+      "context_length": 40960,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-pro-preview",
+      "name": "Google: Gemini 2.5 Pro Preview 06-05",
+      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "deepseek/deepseek-r1-0528-qwen3-8b:free",
+      "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B (free)",
+      "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and.",
+      "context_length": 131072,
+      "hugging_face_id": "deepseek-ai/deepseek-r1-0528-qwen3-8b"
+    },
+    {
+      "id": "deepseek/deepseek-r1-0528-qwen3-8b",
+      "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B",
+      "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and.",
+      "context_length": 32768,
+      "hugging_face_id": "deepseek-ai/deepseek-r1-0528-qwen3-8b"
+    },
+    {
+      "id": "deepseek/deepseek-r1-0528:free",
+      "name": "DeepSeek: R1 0528 (free)",
+      "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-0528"
+    },
+    {
+      "id": "deepseek/deepseek-r1-0528",
+      "name": "DeepSeek: R1 0528",
+      "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-0528"
+    },
+    {
+      "id": "anthropic/claude-opus-4",
+      "name": "Anthropic: Claude Opus 4",
+      "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-sonnet-4",
+      "name": "Anthropic: Claude Sonnet 4",
+      "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/devstral-small-2505",
+      "name": "Mistral: Devstral Small 2505",
+      "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks.",
+      "context_length": 128000,
+      "hugging_face_id": "mistralai/Devstral-Small-2505"
+    },
+    {
+      "id": "google/gemma-3n-e4b-it:free",
+      "name": "Google: Gemma 3n 4B (free)",
+      "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets.",
+      "context_length": 8192,
+      "hugging_face_id": "google/gemma-3n-E4B-it"
+    },
+    {
+      "id": "google/gemma-3n-e4b-it",
+      "name": "Google: Gemma 3n 4B",
+      "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets.",
+      "context_length": 32768,
+      "hugging_face_id": "google/gemma-3n-E4B-it"
+    },
+    {
+      "id": "openai/codex-mini",
+      "name": "OpenAI: Codex Mini",
+      "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "meta-llama/llama-3.3-8b-instruct:free",
+      "name": "Meta: Llama 3.3 8B Instruct (free)",
+      "description": "A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "nousresearch/deephermes-3-mistral-24b-preview",
+      "name": "Nous: DeepHermes 3 Mistral 24B Preview",
+      "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning.",
+      "context_length": 32768,
+      "hugging_face_id": "NousResearch/DeepHermes-3-Mistral-24B-Preview"
+    },
+    {
+      "id": "mistralai/mistral-medium-3",
+      "name": "Mistral: Mistral Medium 3",
+      "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.5-pro-preview-05-06",
+      "name": "Google: Gemini 2.5 Pro Preview 05-06",
+      "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "arcee-ai/spotlight",
+      "name": "Arcee AI: Spotlight",
+      "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "arcee-ai/maestro-reasoning",
+      "name": "Arcee AI: Maestro Reasoning",
+      "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "arcee-ai/virtuoso-large",
+      "name": "Arcee AI: Virtuoso Large",
+      "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "arcee-ai/coder-large",
+      "name": "Arcee AI: Coder Large",
+      "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "microsoft/phi-4-reasoning-plus",
+      "name": "Microsoft: Phi 4 Reasoning Plus",
+      "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks.",
+      "context_length": 32768,
+      "hugging_face_id": "microsoft/Phi-4-reasoning-plus"
+    },
+    {
+      "id": "inception/mercury-coder",
+      "name": "Inception: Mercury Coder",
+      "description": "Mercury Coder is the first diffusion large language model (dLLM).",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen3-4b:free",
+      "name": "Qwen: Qwen3 4B (free)",
+      "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-4B"
+    },
+    {
+      "id": "deepseek/deepseek-prover-v2",
+      "name": "DeepSeek: DeepSeek Prover V2",
+      "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-Prover-V2-671B"
+    },
+    {
+      "id": "meta-llama/llama-guard-4-12b",
+      "name": "Meta: Llama Guard 4 12B",
+      "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification.",
+      "context_length": 163840,
+      "hugging_face_id": "meta-llama/Llama-Guard-4-12B"
+    },
+    {
+      "id": "qwen/qwen3-30b-a3b:free",
+      "name": "Qwen: Qwen3 30B A3B (free)",
+      "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-30B-A3B"
+    },
+    {
+      "id": "qwen/qwen3-30b-a3b",
+      "name": "Qwen: Qwen3 30B A3B",
+      "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-30B-A3B"
+    },
+    {
+      "id": "qwen/qwen3-8b",
+      "name": "Qwen: Qwen3 8B",
+      "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue.",
+      "context_length": 128000,
+      "hugging_face_id": "Qwen/Qwen3-8B"
+    },
+    {
+      "id": "qwen/qwen3-14b:free",
+      "name": "Qwen: Qwen3 14B (free)",
+      "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-14B"
+    },
+    {
+      "id": "qwen/qwen3-14b",
+      "name": "Qwen: Qwen3 14B",
+      "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-14B"
+    },
+    {
+      "id": "qwen/qwen3-32b",
+      "name": "Qwen: Qwen3 32B",
+      "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-32B"
+    },
+    {
+      "id": "qwen/qwen3-235b-a22b:free",
+      "name": "Qwen: Qwen3 235B A22B (free)",
+      "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-235B-A22B"
+    },
+    {
+      "id": "qwen/qwen3-235b-a22b",
+      "name": "Qwen: Qwen3 235B A22B",
+      "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.",
+      "context_length": 40960,
+      "hugging_face_id": "Qwen/Qwen3-235B-A22B"
+    },
+    {
+      "id": "tngtech/deepseek-r1t-chimera:free",
+      "name": "TNG: DeepSeek R1T Chimera (free)",
+      "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3.",
+      "context_length": 163840,
+      "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera"
+    },
+    {
+      "id": "tngtech/deepseek-r1t-chimera",
+      "name": "TNG: DeepSeek R1T Chimera",
+      "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3.",
+      "context_length": 163840,
+      "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera"
+    },
+    {
+      "id": "microsoft/mai-ds-r1:free",
+      "name": "Microsoft: MAI DS R1 (free)",
+      "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile.",
+      "context_length": 163840,
+      "hugging_face_id": "microsoft/MAI-DS-R1"
+    },
+    {
+      "id": "microsoft/mai-ds-r1",
+      "name": "Microsoft: MAI DS R1",
+      "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile.",
+      "context_length": 163840,
+      "hugging_face_id": "microsoft/MAI-DS-R1"
+    },
+    {
+      "id": "openai/o4-mini-high",
+      "name": "OpenAI: o4 Mini High",
+      "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/o3",
+      "name": "OpenAI: o3",
+      "description": "o3 is a well-rounded and powerful model across domains.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/o4-mini",
+      "name": "OpenAI: o4 Mini",
+      "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen2.5-coder-7b-instruct",
+      "name": "Qwen: Qwen2.5 Coder 7B Instruct",
+      "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
+    },
+    {
+      "id": "openai/gpt-4.1",
+      "name": "OpenAI: GPT-4.1",
+      "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning.",
+      "context_length": 1047576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4.1-mini",
+      "name": "OpenAI: GPT-4.1 Mini",
+      "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost.",
+      "context_length": 1047576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4.1-nano",
+      "name": "OpenAI: GPT-4.1 Nano",
+      "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series.",
+      "context_length": 1047576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "eleutherai/llemma_7b",
+      "name": "EleutherAI: Llemma 7b",
+      "description": "Llemma 7B is a language model for mathematics.",
+      "context_length": 4096,
+      "hugging_face_id": "EleutherAI/llemma_7b"
+    },
+    {
+      "id": "alfredpros/codellama-7b-instruct-solidity",
+      "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity",
+      "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.",
+      "context_length": 4096,
+      "hugging_face_id": "AlfredPros/CodeLlama-7b-Instruct-Solidity"
+    },
+    {
+      "id": "arliai/qwq-32b-arliai-rpr-v1:free",
+      "name": "ArliAI: QwQ 32B RpR v1 (free)",
+      "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series.",
+      "context_length": 32768,
+      "hugging_face_id": "ArliAI/QwQ-32B-ArliAI-RpR-v1"
+    },
+    {
+      "id": "arliai/qwq-32b-arliai-rpr-v1",
+      "name": "ArliAI: QwQ 32B RpR v1",
+      "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series.",
+      "context_length": 32768,
+      "hugging_face_id": "ArliAI/QwQ-32B-ArliAI-RpR-v1"
+    },
+    {
+      "id": "agentica-org/deepcoder-14b-preview:free",
+      "name": "Agentica: Deepcoder 14B Preview (free)",
+      "description": "DeepCoder-14B-Preview is a 14B parameter code generation model fine-tuned from DeepSeek-R1-Distill-Qwen-14B using reinforcement learning with GRPO+ and iterative context lengthening.",
+      "context_length": 96000,
+      "hugging_face_id": "agentica-org/DeepCoder-14B-Preview"
+    },
+    {
+      "id": "agentica-org/deepcoder-14b-preview",
+      "name": "Agentica: Deepcoder 14B Preview",
+      "description": "DeepCoder-14B-Preview is a 14B parameter code generation model fine-tuned from DeepSeek-R1-Distill-Qwen-14B using reinforcement learning with GRPO+ and iterative context lengthening.",
+      "context_length": 96000,
+      "hugging_face_id": "agentica-org/DeepCoder-14B-Preview"
+    },
+    {
+      "id": "x-ai/grok-3-mini-beta",
+      "name": "xAI: Grok 3 Mini Beta",
+      "description": "Grok 3 Mini is a lightweight, smaller thinking model.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "x-ai/grok-3-beta",
+      "name": "xAI: Grok 3 Beta",
+      "description": "Grok 3 is the latest model from xAI.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
+      "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1",
+      "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks.",
+      "context_length": 131072,
+      "hugging_face_id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1"
+    },
+    {
+      "id": "meta-llama/llama-4-maverick:free",
+      "name": "Meta: Llama 4 Maverick (free)",
+      "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per.",
+      "context_length": 128000,
+      "hugging_face_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-4-maverick",
+      "name": "Meta: Llama 4 Maverick",
+      "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per.",
+      "context_length": 1048576,
+      "hugging_face_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-4-scout:free",
+      "name": "Meta: Llama 4 Scout (free)",
+      "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.",
+      "context_length": 128000,
+      "hugging_face_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-4-scout",
+      "name": "Meta: Llama 4 Scout",
+      "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.",
+      "context_length": 327680,
+      "hugging_face_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+    },
+    {
+      "id": "qwen/qwen2.5-vl-32b-instruct:free",
+      "name": "Qwen: Qwen2.5 VL 32B Instruct (free)",
+      "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities.",
+      "context_length": 16384,
+      "hugging_face_id": "Qwen/Qwen2.5-VL-32B-Instruct"
+    },
+    {
+      "id": "qwen/qwen2.5-vl-32b-instruct",
+      "name": "Qwen: Qwen2.5 VL 32B Instruct",
+      "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities.",
+      "context_length": 16384,
+      "hugging_face_id": "Qwen/Qwen2.5-VL-32B-Instruct"
+    },
+    {
+      "id": "deepseek/deepseek-chat-v3-0324:free",
+      "name": "DeepSeek: DeepSeek V3 0324 (free)",
+      "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3-0324"
+    },
+    {
+      "id": "deepseek/deepseek-chat-v3-0324",
+      "name": "DeepSeek: DeepSeek V3 0324",
+      "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3-0324"
+    },
+    {
+      "id": "openai/o1-pro",
+      "name": "OpenAI: o1-pro",
+      "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-small-3.1-24b-instruct:free",
+      "name": "Mistral: Mistral Small 3.1 24B (free)",
+      "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities.",
+      "context_length": 96000,
+      "hugging_face_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
+    },
+    {
+      "id": "mistralai/mistral-small-3.1-24b-instruct",
+      "name": "Mistral: Mistral Small 3.1 24B",
+      "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities.",
+      "context_length": 131072,
+      "hugging_face_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
+    },
+    {
+      "id": "allenai/olmo-2-0325-32b-instruct",
+      "name": "AllenAI: Olmo 2 32B Instruct",
+      "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model.",
+      "context_length": 4096,
+      "hugging_face_id": "allenai/OLMo-2-0325-32B-Instruct"
+    },
+    {
+      "id": "google/gemma-3-4b-it:free",
+      "name": "Google: Gemma 3 4B (free)",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 32768,
+      "hugging_face_id": "google/gemma-3-4b-it"
+    },
+    {
+      "id": "google/gemma-3-4b-it",
+      "name": "Google: Gemma 3 4B",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 96000,
+      "hugging_face_id": "google/gemma-3-4b-it"
+    },
+    {
+      "id": "google/gemma-3-12b-it:free",
+      "name": "Google: Gemma 3 12B (free)",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 32768,
+      "hugging_face_id": "google/gemma-3-12b-it"
+    },
+    {
+      "id": "google/gemma-3-12b-it",
+      "name": "Google: Gemma 3 12B",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 131072,
+      "hugging_face_id": "google/gemma-3-12b-it"
+    },
+    {
+      "id": "cohere/command-a",
+      "name": "Cohere: Command A",
+      "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.",
+      "context_length": 256000,
+      "hugging_face_id": "CohereForAI/c4ai-command-a-03-2025"
+    },
+    {
+      "id": "openai/gpt-4o-mini-search-preview",
+      "name": "OpenAI: GPT-4o-mini Search Preview",
+      "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4o-search-preview",
+      "name": "OpenAI: GPT-4o Search Preview",
+      "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemma-3-27b-it:free",
+      "name": "Google: Gemma 3 27B (free)",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemma-3-27b-it",
+      "name": "Google: Gemma 3 27B",
+      "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "thedrummer/skyfall-36b-v2",
+      "name": "TheDrummer: Skyfall 36B V2",
+      "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.",
+      "context_length": 32768,
+      "hugging_face_id": "TheDrummer/Skyfall-36B-v2"
+    },
+    {
+      "id": "microsoft/phi-4-multimodal-instruct",
+      "name": "Microsoft: Phi 4 Multimodal Instruct",
+      "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate.",
+      "context_length": 131072,
+      "hugging_face_id": "microsoft/Phi-4-multimodal-instruct"
+    },
+    {
+      "id": "perplexity/sonar-reasoning-pro",
+      "name": "Perplexity: Sonar Reasoning Pro",
+      "description": "Note: Sonar Pro pricing includes Perplexity search pricing.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "perplexity/sonar-pro",
+      "name": "Perplexity: Sonar Pro",
+      "description": "Note: Sonar Pro pricing includes Perplexity search pricing.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "perplexity/sonar-deep-research",
+      "name": "Perplexity: Sonar Deep Research",
+      "description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwq-32b",
+      "name": "Qwen: QwQ 32B",
+      "description": "QwQ is the reasoning model of the Qwen series.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/QwQ-32B"
+    },
+    {
+      "id": "google/gemini-2.0-flash-lite-001",
+      "name": "Google: Gemini 2.0 Flash Lite",
+      "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-3.7-sonnet:thinking",
+      "name": "Anthropic: Claude 3.7 Sonnet (thinking)",
+      "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-3.7-sonnet",
+      "name": "Anthropic: Claude 3.7 Sonnet",
+      "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-saba",
+      "name": "Mistral: Saba",
+      "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "meta-llama/llama-guard-3-8b",
+      "name": "Llama Guard 3 8B",
+      "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-Guard-3-8B"
+    },
+    {
+      "id": "openai/o3-mini-high",
+      "name": "OpenAI: o3 Mini High",
+      "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.0-flash-001",
+      "name": "Google: Gemini 2.0 Flash",
+      "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-vl-plus",
+      "name": "Qwen: Qwen VL Plus",
+      "description": "Qwen's Enhanced Large Visual Language Model.",
+      "context_length": 7500,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "aion-labs/aion-1.0",
+      "name": "AionLabs: Aion-1.0",
+      "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "aion-labs/aion-1.0-mini",
+      "name": "AionLabs: Aion-1.0-Mini",
+      "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic.",
+      "context_length": 131072,
+      "hugging_face_id": "FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview"
+    },
+    {
+      "id": "aion-labs/aion-rp-llama-3.1-8b",
+      "name": "AionLabs: Aion-RP 1.0 (8B)",
+      "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-vl-max",
+      "name": "Qwen: Qwen VL Max",
+      "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-turbo",
+      "name": "Qwen: Qwen-Turbo",
+      "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.",
+      "context_length": 1000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen2.5-vl-72b-instruct",
+      "name": "Qwen: Qwen2.5 VL 72B Instruct",
+      "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-VL-72B-Instruct"
+    },
+    {
+      "id": "qwen/qwen-plus",
+      "name": "Qwen: Qwen-Plus",
+      "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-max",
+      "name": "Qwen: Qwen-Max ",
+      "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/o3-mini",
+      "name": "OpenAI: o3 Mini",
+      "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-small-24b-instruct-2501:free",
+      "name": "Mistral: Mistral Small 3 (free)",
+      "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-Small-24B-Instruct-2501"
+    },
+    {
+      "id": "mistralai/mistral-small-24b-instruct-2501",
+      "name": "Mistral: Mistral Small 3",
+      "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-Small-24B-Instruct-2501"
+    },
+    {
+      "id": "deepseek/deepseek-r1-distill-qwen-32b",
+      "name": "DeepSeek: R1 Distill Qwen 32B",
+      "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).",
+      "context_length": 131072,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
+    },
+    {
+      "id": "deepseek/deepseek-r1-distill-qwen-14b",
+      "name": "DeepSeek: R1 Distill Qwen 14B",
+      "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek.",
+      "context_length": 32768,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
+    },
+    {
+      "id": "perplexity/sonar-reasoning",
+      "name": "Perplexity: Sonar Reasoning",
+      "description": "Sonar Reasoning is a reasoning model provided by Perplexity based on [DeepSeek R1](/deepseek/deepseek-r1).",
+      "context_length": 127000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "perplexity/sonar",
+      "name": "Perplexity: Sonar",
+      "description": "Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources.",
+      "context_length": 127072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "deepseek/deepseek-r1-distill-llama-70b:free",
+      "name": "DeepSeek: R1 Distill Llama 70B (free)",
+      "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).",
+      "context_length": 8192,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+    },
+    {
+      "id": "deepseek/deepseek-r1-distill-llama-70b",
+      "name": "DeepSeek: R1 Distill Llama 70B",
+      "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).",
+      "context_length": 131072,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+    },
+    {
+      "id": "deepseek/deepseek-r1:free",
+      "name": "DeepSeek: R1 (free)",
+      "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1"
+    },
+    {
+      "id": "deepseek/deepseek-r1",
+      "name": "DeepSeek: R1",
+      "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-R1"
+    },
+    {
+      "id": "minimax/minimax-01",
+      "name": "MiniMax: MiniMax-01",
+      "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding.",
+      "context_length": 1000192,
+      "hugging_face_id": "MiniMaxAI/MiniMax-Text-01"
+    },
+    {
+      "id": "mistralai/codestral-2501",
+      "name": "Mistral: Codestral 2501",
+      "description": "[Mistral](/mistralai)'s cutting-edge language model for coding.",
+      "context_length": 256000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "microsoft/phi-4",
+      "name": "Microsoft: Phi 4",
+      "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed.",
+      "context_length": 16384,
+      "hugging_face_id": "microsoft/phi-4"
+    },
+    {
+      "id": "sao10k/l3.1-70b-hanami-x1",
+      "name": "Sao10K: Llama 3.1 70B Hanami x1",
+      "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).",
+      "context_length": 16000,
+      "hugging_face_id": "Sao10K/L3.1-70B-Hanami-x1"
+    },
+    {
+      "id": "deepseek/deepseek-chat",
+      "name": "DeepSeek: DeepSeek V3",
+      "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions.",
+      "context_length": 163840,
+      "hugging_face_id": "deepseek-ai/DeepSeek-V3"
+    },
+    {
+      "id": "sao10k/l3.3-euryale-70b",
+      "name": "Sao10K: Llama 3.3 Euryale 70B",
+      "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).",
+      "context_length": 131072,
+      "hugging_face_id": "Sao10K/L3.3-70B-Euryale-v2.3"
+    },
+    {
+      "id": "openai/o1",
+      "name": "OpenAI: o1",
+      "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "cohere/command-r7b-12-2024",
+      "name": "Cohere: Command R7B (12-2024)",
+      "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemini-2.0-flash-exp:free",
+      "name": "Google: Gemini 2.0 Flash Experimental (free)",
+      "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro.",
+      "context_length": 1048576,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "meta-llama/llama-3.3-70b-instruct:free",
+      "name": "Meta: Llama 3.3 70B Instruct (free)",
+      "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out).",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.3-70b-instruct",
+      "name": "Meta: Llama 3.3 70B Instruct",
+      "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out).",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct"
+    },
+    {
+      "id": "amazon/nova-lite-v1",
+      "name": "Amazon: Nova Lite 1.0",
+      "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output.",
+      "context_length": 300000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "amazon/nova-micro-v1",
+      "name": "Amazon: Nova Micro 1.0",
+      "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "amazon/nova-pro-v1",
+      "name": "Amazon: Nova Pro 1.0",
+      "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks.",
+      "context_length": 300000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4o-2024-11-20",
+      "name": "OpenAI: GPT-4o (2024-11-20)",
+      "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-large-2411",
+      "name": "Mistral Large 2411",
+      "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-large-2407",
+      "name": "Mistral Large 2407",
+      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407).",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/pixtral-large-2411",
+      "name": "Mistral: Pixtral Large 2411",
+      "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411).",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-2.5-coder-32b-instruct:free",
+      "name": "Qwen2.5 Coder 32B Instruct (free)",
+      "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+    },
+    {
+      "id": "qwen/qwen-2.5-coder-32b-instruct",
+      "name": "Qwen2.5 Coder 32B Instruct",
+      "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+    },
+    {
+      "id": "raifle/sorcererlm-8x22b",
+      "name": "SorcererLM 8x22B",
+      "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).",
+      "context_length": 16000,
+      "hugging_face_id": "rAIfle/SorcererLM-8x22b-bf16"
+    },
+    {
+      "id": "thedrummer/unslopnemo-12b",
+      "name": "TheDrummer: UnslopNemo 12B",
+      "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.",
+      "context_length": 32768,
+      "hugging_face_id": "TheDrummer/UnslopNemo-12B-v4.1"
+    },
+    {
+      "id": "anthropic/claude-3.5-haiku",
+      "name": "Anthropic: Claude 3.5 Haiku",
+      "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-3.5-haiku-20241022",
+      "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)",
+      "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthracite-org/magnum-v4-72b",
+      "name": "Magnum v4 72B",
+      "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and.",
+      "context_length": 16384,
+      "hugging_face_id": "anthracite-org/magnum-v4-72b"
+    },
+    {
+      "id": "anthropic/claude-3.5-sonnet",
+      "name": "Anthropic: Claude 3.5 Sonnet",
+      "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/ministral-3b",
+      "name": "Mistral: Ministral 3B",
+      "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/ministral-8b",
+      "name": "Mistral: Ministral 8B",
+      "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference.",
+      "context_length": 131072,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "qwen/qwen-2.5-7b-instruct",
+      "name": "Qwen: Qwen2.5 7B Instruct",
+      "description": "Qwen2.5 7B is the latest series of Qwen large language models.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-7B-Instruct"
+    },
+    {
+      "id": "nvidia/llama-3.1-nemotron-70b-instruct",
+      "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct",
+      "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses.",
+      "context_length": 131072,
+      "hugging_face_id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
+    },
+    {
+      "id": "inflection/inflection-3-productivity",
+      "name": "Inflection: Inflection 3 Productivity",
+      "description": "Inflection 3 Productivity is optimized for following instructions.",
+      "context_length": 8000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "inflection/inflection-3-pi",
+      "name": "Inflection: Inflection 3 Pi",
+      "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety.",
+      "context_length": 8000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "thedrummer/rocinante-12b",
+      "name": "TheDrummer: Rocinante 12B",
+      "description": "Rocinante 12B is designed for engaging storytelling and rich prose.",
+      "context_length": 32768,
+      "hugging_face_id": "TheDrummer/Rocinante-12B-v1.1"
+    },
+    {
+      "id": "meta-llama/llama-3.2-90b-vision-instruct",
+      "name": "Meta: Llama 3.2 90B Vision Instruct",
+      "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks.",
+      "context_length": 32768,
+      "hugging_face_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.2-1b-instruct",
+      "name": "Meta: Llama 3.2 1B Instruct",
+      "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis.",
+      "context_length": 60000,
+      "hugging_face_id": "meta-llama/Llama-3.2-1B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.2-3b-instruct:free",
+      "name": "Meta: Llama 3.2 3B Instruct (free)",
+      "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.2-3b-instruct",
+      "name": "Meta: Llama 3.2 3B Instruct",
+      "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.2-11b-vision-instruct",
+      "name": "Meta: Llama 3.2 11B Vision Instruct",
+      "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
+    },
+    {
+      "id": "qwen/qwen-2.5-72b-instruct:free",
+      "name": "Qwen2.5 72B Instruct (free)",
+      "description": "Qwen2.5 72B is the latest series of Qwen large language models.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-72B-Instruct"
+    },
+    {
+      "id": "qwen/qwen-2.5-72b-instruct",
+      "name": "Qwen2.5 72B Instruct",
+      "description": "Qwen2.5 72B is the latest series of Qwen large language models.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-72B-Instruct"
+    },
+    {
+      "id": "neversleep/llama-3.1-lumimaid-8b",
+      "name": "NeverSleep: Lumimaid v0.2 8B",
+      "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1.",
+      "context_length": 32768,
+      "hugging_face_id": "NeverSleep/Lumimaid-v0.2-8B"
+    },
+    {
+      "id": "mistralai/pixtral-12b",
+      "name": "Mistral: Pixtral 12B",
+      "description": "The first multi-modal, text+image-to-text model from Mistral AI.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Pixtral-12B-2409"
+    },
+    {
+      "id": "cohere/command-r-08-2024",
+      "name": "Cohere: Command R (08-2024)",
+      "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "cohere/command-r-plus-08-2024",
+      "name": "Cohere: Command R+ (08-2024)",
+      "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "sao10k/l3.1-euryale-70b",
+      "name": "Sao10K: Llama 3.1 Euryale 70B v2.2",
+      "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).",
+      "context_length": 32768,
+      "hugging_face_id": "Sao10K/L3.1-70B-Euryale-v2.2"
+    },
+    {
+      "id": "qwen/qwen-2.5-vl-7b-instruct",
+      "name": "Qwen: Qwen2.5-VL 7B Instruct",
+      "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art.",
+      "context_length": 32768,
+      "hugging_face_id": "Qwen/Qwen2.5-VL-7B-Instruct"
+    },
+    {
+      "id": "microsoft/phi-3.5-mini-128k-instruct",
+      "name": "Microsoft: Phi-3.5 Mini 128K Instruct",
+      "description": "Phi-3.5 models are lightweight, state-of-the-art open models.",
+      "context_length": 128000,
+      "hugging_face_id": "microsoft/Phi-3.5-mini-instruct"
+    },
+    {
+      "id": "nousresearch/hermes-3-llama-3.1-70b",
+      "name": "Nous: Hermes 3 70B Instruct",
+      "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying,.",
+      "context_length": 65536,
+      "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-70B"
+    },
+    {
+      "id": "nousresearch/hermes-3-llama-3.1-405b:free",
+      "name": "Nous: Hermes 3 405B Instruct (free)",
+      "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context.",
+      "context_length": 131072,
+      "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B"
+    },
+    {
+      "id": "nousresearch/hermes-3-llama-3.1-405b",
+      "name": "Nous: Hermes 3 405B Instruct",
+      "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context.",
+      "context_length": 131072,
+      "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B"
+    },
+    {
+      "id": "openai/chatgpt-4o-latest",
+      "name": "OpenAI: ChatGPT-4o",
+      "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "sao10k/l3-lunaris-8b",
+      "name": "Sao10K: Llama 3 8B Lunaris",
+      "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3.",
+      "context_length": 8192,
+      "hugging_face_id": "Sao10K/L3-8B-Lunaris-v1"
+    },
+    {
+      "id": "openai/gpt-4o-2024-08-06",
+      "name": "OpenAI: GPT-4o (2024-08-06)",
+      "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "meta-llama/llama-3.1-405b",
+      "name": "Meta: Llama 3.1 405B (base)",
+      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.",
+      "context_length": 32768,
+      "hugging_face_id": "meta-llama/llama-3.1-405B"
+    },
+    {
+      "id": "meta-llama/llama-3.1-70b-instruct",
+      "name": "Meta: Llama 3.1 70B Instruct",
+      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.1-405b-instruct",
+      "name": "Meta: Llama 3.1 405B Instruct",
+      "description": "The highly anticipated 400B class of Llama3 is here.",
+      "context_length": 130815,
+      "hugging_face_id": "meta-llama/Meta-Llama-3.1-405B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3.1-8b-instruct",
+      "name": "Meta: Llama 3.1 8B Instruct",
+      "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.",
+      "context_length": 131072,
+      "hugging_face_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
+    },
+    {
+      "id": "mistralai/mistral-nemo:free",
+      "name": "Mistral: Mistral Nemo (free)",
+      "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.",
+      "context_length": 131072,
+      "hugging_face_id": "mistralai/Mistral-Nemo-Instruct-2407"
+    },
+    {
+      "id": "mistralai/mistral-nemo",
+      "name": "Mistral: Mistral Nemo",
+      "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.",
+      "context_length": 131072,
+      "hugging_face_id": "mistralai/Mistral-Nemo-Instruct-2407"
+    },
+    {
+      "id": "openai/gpt-4o-mini",
+      "name": "OpenAI: GPT-4o-mini",
+      "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4o-mini-2024-07-18",
+      "name": "OpenAI: GPT-4o-mini (2024-07-18)",
+      "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "google/gemma-2-27b-it",
+      "name": "Google: Gemma 2 27B",
+      "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).",
+      "context_length": 8192,
+      "hugging_face_id": "google/gemma-2-27b-it"
+    },
+    {
+      "id": "google/gemma-2-9b-it",
+      "name": "Google: Gemma 2 9B",
+      "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.",
+      "context_length": 8192,
+      "hugging_face_id": "google/gemma-2-9b-it"
+    },
+    {
+      "id": "anthropic/claude-3.5-sonnet-20240620",
+      "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20)",
+      "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "sao10k/l3-euryale-70b",
+      "name": "Sao10k: Llama 3 Euryale 70B v2.1",
+      "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).",
+      "context_length": 8192,
+      "hugging_face_id": "Sao10K/L3-70B-Euryale-v2.1"
+    },
+    {
+      "id": "mistralai/mistral-7b-instruct-v0.3",
+      "name": "Mistral: Mistral 7B Instruct v0.3",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3"
+    },
+    {
+      "id": "mistralai/mistral-7b-instruct:free",
+      "name": "Mistral: Mistral 7B Instruct (free)",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3"
+    },
+    {
+      "id": "mistralai/mistral-7b-instruct",
+      "name": "Mistral: Mistral 7B Instruct",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3"
+    },
+    {
+      "id": "nousresearch/hermes-2-pro-llama-3-8b",
+      "name": "NousResearch: Hermes 2 Pro - Llama-3 8B",
+      "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON.",
+      "context_length": 8192,
+      "hugging_face_id": "NousResearch/Hermes-2-Pro-Llama-3-8B"
+    },
+    {
+      "id": "microsoft/phi-3-mini-128k-instruct",
+      "name": "Microsoft: Phi-3 Mini 128K Instruct",
+      "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following.",
+      "context_length": 128000,
+      "hugging_face_id": "microsoft/Phi-3-mini-128k-instruct"
+    },
+    {
+      "id": "microsoft/phi-3-medium-128k-instruct",
+      "name": "Microsoft: Phi-3 Medium 128K Instruct",
+      "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following.",
+      "context_length": 128000,
+      "hugging_face_id": "microsoft/Phi-3-medium-128k-instruct"
+    },
+    {
+      "id": "openai/gpt-4o",
+      "name": "OpenAI: GPT-4o",
+      "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4o:extended",
+      "name": "OpenAI: GPT-4o (extended)",
+      "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4o-2024-05-13",
+      "name": "OpenAI: GPT-4o (2024-05-13)",
+      "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "meta-llama/llama-guard-2-8b",
+      "name": "Meta: LlamaGuard 2 8B",
+      "description": "This safeguard model has 8B parameters and is based on the Llama 3 family.",
+      "context_length": 8192,
+      "hugging_face_id": "meta-llama/Meta-Llama-Guard-2-8B"
+    },
+    {
+      "id": "meta-llama/llama-3-8b-instruct",
+      "name": "Meta: Llama 3 8B Instruct",
+      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors.",
+      "context_length": 8192,
+      "hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct"
+    },
+    {
+      "id": "meta-llama/llama-3-70b-instruct",
+      "name": "Meta: Llama 3 70B Instruct",
+      "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors.",
+      "context_length": 8192,
+      "hugging_face_id": "meta-llama/Meta-Llama-3-70B-Instruct"
+    },
+    {
+      "id": "mistralai/mixtral-8x22b-instruct",
+      "name": "Mistral: Mixtral 8x22B Instruct",
+      "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).",
+      "context_length": 65536,
+      "hugging_face_id": "mistralai/Mixtral-8x22B-Instruct-v0.1"
+    },
+    {
+      "id": "microsoft/wizardlm-2-8x22b",
+      "name": "WizardLM-2 8x22B",
+      "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model.",
+      "context_length": 65536,
+      "hugging_face_id": "microsoft/WizardLM-2-8x22B"
+    },
+    {
+      "id": "openai/gpt-4-turbo",
+      "name": "OpenAI: GPT-4 Turbo",
+      "description": "The latest GPT-4 Turbo model with vision capabilities.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-3-haiku",
+      "name": "Anthropic: Claude 3 Haiku",
+      "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "anthropic/claude-3-opus",
+      "name": "Anthropic: Claude 3 Opus",
+      "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks.",
+      "context_length": 200000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-large",
+      "name": "Mistral Large",
+      "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`).",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4-turbo-preview",
+      "name": "OpenAI: GPT-4 Turbo Preview",
+      "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-3.5-turbo-0613",
+      "name": "OpenAI: GPT-3.5 Turbo (older v0613)",
+      "description": "GPT-3.5 Turbo is OpenAI's fastest model.",
+      "context_length": 4095,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-small",
+      "name": "Mistral Small",
+      "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-tiny",
+      "name": "Mistral Tiny",
+      "description": "Note: This model is being deprecated.",
+      "context_length": 32768,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-7b-instruct-v0.2",
+      "name": "Mistral: Mistral 7B Instruct v0.2",
+      "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.2"
+    },
+    {
+      "id": "mistralai/mixtral-8x7b-instruct",
+      "name": "Mistral: Mixtral 8x7B Instruct",
+      "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use.",
+      "context_length": 32768,
+      "hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    },
+    {
+      "id": "neversleep/noromaid-20b",
+      "name": "Noromaid 20B",
+      "description": "A collab between IkariDev and Undi.",
+      "context_length": 4096,
+      "hugging_face_id": "NeverSleep/Noromaid-20b-v0.1.1"
+    },
+    {
+      "id": "alpindale/goliath-120b",
+      "name": "Goliath 120B",
+      "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model.",
+      "context_length": 6144,
+      "hugging_face_id": "alpindale/goliath-120b"
+    },
+    {
+      "id": "openrouter/auto",
+      "name": "Auto Router",
+      "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.",
+      "context_length": 2000000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4-1106-preview",
+      "name": "OpenAI: GPT-4 Turbo (older v1106)",
+      "description": "The latest GPT-4 Turbo model with vision capabilities.",
+      "context_length": 128000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mistralai/mistral-7b-instruct-v0.1",
+      "name": "Mistral: Mistral 7B Instruct v0.1",
+      "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
+      "context_length": 2824,
+      "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1"
+    },
+    {
+      "id": "openai/gpt-3.5-turbo-instruct",
+      "name": "OpenAI: GPT-3.5 Turbo Instruct",
+      "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations.",
+      "context_length": 4095,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-3.5-turbo-16k",
+      "name": "OpenAI: GPT-3.5 Turbo 16k",
+      "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost.",
+      "context_length": 16385,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "mancer/weaver",
+      "name": "Mancer: Weaver (alpha)",
+      "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory.",
+      "context_length": 8000,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "undi95/remm-slerp-l2-13b",
+      "name": "ReMM SLERP 13B",
+      "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models.",
+      "context_length": 6144,
+      "hugging_face_id": "Undi95/ReMM-SLERP-L2-13B"
+    },
+    {
+      "id": "gryphe/mythomax-l2-13b",
+      "name": "MythoMax 13B",
+      "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay.",
+      "context_length": 4096,
+      "hugging_face_id": "Gryphe/MythoMax-L2-13b"
+    },
+    {
+      "id": "openai/gpt-4-0314",
+      "name": "OpenAI: GPT-4 (older v0314)",
+      "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14.",
+      "context_length": 8191,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-4",
+      "name": "OpenAI: GPT-4",
+      "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and.",
+      "context_length": 8191,
+      "hugging_face_id": ""
+    },
+    {
+      "id": "openai/gpt-3.5-turbo",
+      "name": "OpenAI: GPT-3.5 Turbo",
+      "description": "GPT-3.5 Turbo is OpenAI's fastest model.",
+      "context_length": 16385,
+      "hugging_face_id": ""
+    }
+  ]
+}
diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts
index 1bab7abeb81..4d2078fb24a 100644
--- a/gui/src/pages/AddNewModel/configs/providers.ts
+++ b/gui/src/pages/AddNewModel/configs/providers.ts
@@ -3,6 +3,7 @@ import { ModelProviderTags } from "../../../components/modelSelection/utils";
 import { completionParamsInputs } from "./completionParamsInputs";
 import type { ModelPackage } from "./models";
 import { models } from "./models";
+import { openRouterModelsList } from "./openRouterModel";
 
 export interface InputDescriptor {
   inputType: HTMLInputTypeAttribute;
@@ -170,6 +171,29 @@ export const providers: Partial<Record<string, ProviderInfo>> = {
     packages: [models.claude4Sonnet, models.claude41Opus, models.claude35Haiku],
     apiKeyUrl: "https://console.anthropic.com/account/keys",
   },
+  openrouter: {
+    title: "OpenRouter",
+    provider: "openrouter",
+    description:
+      "OpenRouter provides access to a variety of LLMs including open-source and proprietary models.",
+    longDescription: `To get started with OpenRouter, sign up for an account at [openrouter.ai](https://openrouter.ai/) and obtain your API key from the dashboard.`,
+    icon: "openrouter.png",
+    tags: [ModelProviderTags.RequiresApiKey],
+    refPage: "openrouter",
+    apiKeyUrl: "https://openrouter.ai/settings/keys",
+    collectInputFor: [
+      {
+        inputType: "text",
+        key: "apiKey",
+        label: "API Key",
+        placeholder: "Enter your OpenRouter API key",
+        required: true,
+      },
+      ...completionParamsInputsConfigs,
+    ],
+    packages: openRouterModelsList,
+  },
+
   moonshot: {
     title: "Moonshot",
     provider: "moonshot",