diff --git a/gui/public/logos/openrouter.png b/gui/public/logos/openrouter.png new file mode 100644 index 00000000000..04f998a53af Binary files /dev/null and b/gui/public/logos/openrouter.png differ diff --git a/gui/src/components/modelSelection/ModelSelectionListbox.tsx b/gui/src/components/modelSelection/ModelSelectionListbox.tsx index 04510b6bb33..87ba2822c56 100644 --- a/gui/src/components/modelSelection/ModelSelectionListbox.tsx +++ b/gui/src/components/modelSelection/ModelSelectionListbox.tsx @@ -2,8 +2,9 @@ import { CheckIcon, ChevronUpDownIcon, CubeIcon, + MagnifyingGlassIcon, } from "@heroicons/react/24/outline"; -import { Fragment } from "react"; +import { Fragment, useEffect, useMemo, useState } from "react"; import { Listbox, ListboxButton, @@ -19,6 +20,34 @@ interface ModelSelectionListboxProps { setSelectedProvider: (val: DisplayInfo) => void; topOptions?: DisplayInfo[]; otherOptions?: DisplayInfo[]; + searchPlaceholder?: string; +} + +/** + * Simple fuzzy search algorithm + * Returns a score based on how well the query matches the text + */ +function fuzzyScore(query: string, text: string): number { + const q = query.toLowerCase(); + const t = text.toLowerCase(); + + if (!q) return 1; // Empty query matches everything + if (!t) return 0; + + let score = 0; + let queryIdx = 0; + let lastMatchIdx = -1; + + for (let i = 0; i < t.length && queryIdx < q.length; i++) { + if (t[i] === q[queryIdx]) { + score += 1 + (lastMatchIdx === i - 1 ? 5 : 0); // Bonus for consecutive matches + lastMatchIdx = i; + queryIdx++; + } + } + + // Return 0 if not all query characters were found + return queryIdx === q.length ? score / t.length : 0; } function ModelSelectionListbox({ @@ -26,9 +55,51 @@ function ModelSelectionListbox({ setSelectedProvider, topOptions = [], otherOptions = [], + searchPlaceholder = "Search models...", }: ModelSelectionListboxProps) { + const [searchQuery, setSearchQuery] = useState(""); + + // Clear search query when provider changes + useEffect(() => { + setSearchQuery(""); + }, [selectedProvider]); + + // Combine and filter options based on fuzzy search + const filteredTopOptions = useMemo(() => { + if (!searchQuery) return topOptions; + return topOptions + .map((opt) => ({ + option: opt, + score: fuzzyScore(searchQuery, opt.title), + })) + .filter(({ score }) => score > 0) + .sort((a, b) => b.score - a.score) + .map(({ option }) => option); + }, [searchQuery, topOptions]); + + const filteredOtherOptions = useMemo(() => { + if (!searchQuery) return otherOptions; + return otherOptions + .map((opt) => ({ + option: opt, + score: fuzzyScore(searchQuery, opt.title), + })) + .filter(({ score }) => score > 0) + .sort((a, b) => b.score - a.score) + .map(({ option }) => option); + }, [searchQuery, otherOptions]); + + const hasResults = + filteredTopOptions.length > 0 || filteredOtherOptions.length > 0; + return ( - + { + setSelectedProvider(value); + setSearchQuery(""); + }} + >
@@ -54,87 +125,120 @@ function ModelSelectionListbox({ leaveFrom="opacity-100" leaveTo="opacity-0" > - - {topOptions.length > 0 && ( -
-
- Popular -
- {topOptions.map((option, index) => ( - - ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-default cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4` - } - value={option} - > - {({ selected }) => ( - <> -
- {option.title === "Autodetect" ? ( - - ) : ( - window.vscMediaUrl && - option.icon && ( - - ) - )} - {option.title} -
- {selected && ( -
- ))} + + {/* Search Box */} +
+
+ + setSearchQuery(e.target.value)} + className="bg-background text-foreground placeholder-description-muted w-full border-0 px-2 py-1.5 outline-none" + onClick={(e) => e.stopPropagation()} + />
- )} - {topOptions.length > 0 && otherOptions.length > 0 && ( -
- )} - {otherOptions.length > 0 && ( -
-
- Additional providers +
+ + {/* Results */} +
+ {!hasResults ? ( +
+ No models found matching "{searchQuery}"
- {otherOptions.map((option, index) => ( - - ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-default cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4` - } - value={option} - > - {({ selected }) => ( - <> -
- {option.title === "Autodetect" ? ( - - ) : ( - window.vscMediaUrl && - option.icon && ( - - ) + ) : ( + <> + {filteredTopOptions.length > 0 && ( +
+
+ Popular +
+ {filteredTopOptions.map((option, index) => ( + + ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4` + } + value={option} + > + {({ selected }) => ( + <> +
+ {option.title === "Autodetect" ? ( + + ) : ( + window.vscMediaUrl && + option.icon && ( + + ) + )} + {option.title} +
+ {selected && ( +
- - {selected && ( -
+ )} + {filteredTopOptions.length > 0 && + filteredOtherOptions.length > 0 && ( +
)} - - ))} -
- )} + {filteredOtherOptions.length > 0 && ( +
+
+ Additional providers +
+ {filteredOtherOptions.map((option, index) => ( + + ` ${selected ? "bg-list-active" : "bg-input"} hover:bg-list-active hover:text-list-active-foreground relative flex cursor-pointer select-none items-center justify-between gap-2 p-1.5 px-3 py-2 pr-4` + } + value={option} + > + {({ selected }) => ( + <> +
+ {option.title === "Autodetect" ? ( + + ) : ( + window.vscMediaUrl && + option.icon && ( + + ) + )} + {option.title} +
+ + {selected && ( +
+ ))} +
+ )} + + )} +
diff --git a/gui/src/forms/AddModelForm.tsx b/gui/src/forms/AddModelForm.tsx index 61a7142992e..3d24816c4d0 100644 --- a/gui/src/forms/AddModelForm.tsx +++ b/gui/src/forms/AddModelForm.tsx @@ -47,6 +47,7 @@ export function AddModelForm({ providers["gemini"]?.title || "", providers["azure"]?.title || "", providers["ollama"]?.title || "", + providers["openrouter"]?.title || "", ]; const allProviders = Object.entries(providers) @@ -149,6 +150,7 @@ export function AddModelForm({ }} topOptions={popularProviders} otherOptions={otherProviders} + searchPlaceholder="Search providers..." /> Don't see your provider?{" "} diff --git a/gui/src/pages/AddNewModel/configs/openRouterModel.ts b/gui/src/pages/AddNewModel/configs/openRouterModel.ts new file mode 100644 index 00000000000..2c5ba5b4ac2 --- /dev/null +++ b/gui/src/pages/AddNewModel/configs/openRouterModel.ts @@ -0,0 +1,74 @@ +import { ModelPackage } from "./models"; +import openRouterModelsData from "./openRouterModels.json"; + +interface OpenRouterModel { + id: string; + name: string; + description: string; + context_length: number; + hugging_face_id: string; +} + +/** + * Convert OpenRouter model data to ModelPackage format + */ +function convertOpenRouterModelToPackage(model: OpenRouterModel): ModelPackage { + // Extract provider name from id (e.g., "openai/gpt-5.1" -> "openai") + const [provider] = model.id.split("/"); + + return { + title: model.name, + description: model.description, + refUrl: `https://openrouter.ai/models/${model.id}`, + params: { + model: model.id, + contextLength: model.context_length, + }, + isOpenSource: !!model.hugging_face_id, + tags: [provider as any], + }; +} + +/** + * Generate ModelPackage objects from OpenRouter models JSON + */ +export function generateOpenRouterModels(): { + [key: string]: ModelPackage; +} { + const models: { [key: string]: ModelPackage } = {}; + + const data = openRouterModelsData as { data: OpenRouterModel[] }; + + if (!data.data || !Array.isArray(data.data)) { + console.warn("Invalid OpenRouter models data structure"); + return models; + } + + data.data.forEach((model: OpenRouterModel) => { + if (!model.id || !model.name) { + console.warn("Skipping model with missing id or name", model); + return; + } + + // Create a unique key from the model id (replace slashes and dots with underscores) + const key = model.id.replace(/[\/.]/g, "_"); + + try { + models[key] = convertOpenRouterModelToPackage(model); + } catch (error) { + console.error(`Failed to convert model ${model.id}:`, error); + } + }); + + return models; +} + +/** + * Export all OpenRouter models as a pre-generated object + */ +export const openRouterModels = generateOpenRouterModels(); + +/** + * Export OpenRouter models as an array for use in provider packages + */ +export const openRouterModelsList = Object.values(openRouterModels); diff --git a/gui/src/pages/AddNewModel/configs/openRouterModels.json b/gui/src/pages/AddNewModel/configs/openRouterModels.json new file mode 100644 index 00000000000..e4c6fe0e63e --- /dev/null +++ b/gui/src/pages/AddNewModel/configs/openRouterModels.json @@ -0,0 +1,2412 @@ +{ + "data": [ + { + "id": "openai/gpt-5.1", + "name": "OpenAI: GPT-5.1", + "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5.1-chat", + "name": "OpenAI: GPT-5.1 Chat", + "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5.1-codex", + "name": "OpenAI: GPT-5.1-Codex", + "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5.1-codex-mini", + "name": "OpenAI: GPT-5.1-Codex-Mini", + "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "kwaipilot/kat-coder-pro:free", + "name": "Kwaipilot: KAT-Coder-Pro V1 (free)", + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "moonshotai/kimi-linear-48b-a3b-instruct", + "name": "MoonshotAI: Kimi Linear 48B A3B Instruct", + "description": "Kimi Linear is a hybrid linear attention architecture that outperforms traditional full attention methods across various contexts, including short, long, and reinforcement learning (RL) scaling.", + "context_length": 1048576, + "hugging_face_id": "moonshotai/Kimi-Linear-48B-A3B-Instruct" + }, + { + "id": "moonshotai/kimi-k2-thinking", + "name": "MoonshotAI: Kimi K2 Thinking", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning.", + "context_length": 262144, + "hugging_face_id": "moonshotai/Kimi-K2-Thinking" + }, + { + "id": "amazon/nova-premier-v1", + "name": "Amazon: Nova Premier 1.0", + "description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "perplexity/sonar-pro-search", + "name": "Perplexity: Sonar Pro Search", + "description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/voxtral-small-24b-2507", + "name": "Mistral: Voxtral Small 24B 2507", + "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance.", + "context_length": 32000, + "hugging_face_id": "mistralai/Voxtral-Small-24B-2507" + }, + { + "id": "openai/gpt-oss-safeguard-20b", + "name": "OpenAI: gpt-oss-safeguard-20b", + "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b.", + "context_length": 131072, + "hugging_face_id": "openai/gpt-oss-safeguard-20b" + }, + { + "id": "nvidia/nemotron-nano-12b-v2-vl:free", + "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence.", + "context_length": 128000, + "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16" + }, + { + "id": "nvidia/nemotron-nano-12b-v2-vl", + "name": "NVIDIA: Nemotron Nano 12B 2 VL", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence.", + "context_length": 131072, + "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16" + }, + { + "id": "minimax/minimax-m2", + "name": "MiniMax: MiniMax M2", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows.", + "context_length": 204800, + "hugging_face_id": "MiniMaxAI/MiniMax-M2" + }, + { + "id": "liquid/lfm2-8b-a1b", + "name": "LiquidAI/LFM2-8B-A1B", + "description": "Model created via inbox interface.", + "context_length": 32768, + "hugging_face_id": "LiquidAI/LFM2-8B-A1B" + }, + { + "id": "liquid/lfm-2.2-6b", + "name": "LiquidAI/LFM2-2.6B", + "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment.", + "context_length": 32768, + "hugging_face_id": "LiquidAI/LFM2-2.6B" + }, + { + "id": "ibm-granite/granite-4.0-h-micro", + "name": "IBM: Granite 4.0 Micro", + "description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models.", + "context_length": 131000, + "hugging_face_id": "ibm-granite/granite-4.0-h-micro" + }, + { + "id": "deepcogito/cogito-v2-preview-llama-405b", + "name": "Deep Cogito: Cogito V2 Preview Llama 405B", + "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection.", + "context_length": 32768, + "hugging_face_id": "deepcogito/cogito-v2-preview-llama-405B" + }, + { + "id": "openai/gpt-5-image-mini", + "name": "OpenAI: GPT-5 Image Mini", + "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-haiku-4.5", + "name": "Anthropic: Claude Haiku 4.5", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-vl-8b-thinking", + "name": "Qwen: Qwen3 VL 8B Thinking", + "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences.", + "context_length": 256000, + "hugging_face_id": "Qwen/Qwen3-VL-8B-Thinking" + }, + { + "id": "qwen/qwen3-vl-8b-instruct", + "name": "Qwen: Qwen3 VL 8B Instruct", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video.", + "context_length": 131072, + "hugging_face_id": "Qwen/Qwen3-VL-8B-Instruct" + }, + { + "id": "openai/gpt-5-image", + "name": "OpenAI: GPT-5 Image", + "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's most advanced language model with state-of-the-art image generation capabilities.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "inclusionai/ring-1t", + "name": "inclusionAI: Ring 1T", + "description": "Ring-1T has undergone continued scaling with large-scale verifiable reward reinforcement learning (RLVR) training, further unlocking the natural language reasoning capabilities of the.", + "context_length": 131072, + "hugging_face_id": "inclusionAI/Ring-1T" + }, + { + "id": "inclusionai/ling-1t", + "name": "inclusionAI: Ling-1T", + "description": "Ling-1T is a trillion-parameter open-weight large language model developed by inclusionAI and released under the MIT license.", + "context_length": 131072, + "hugging_face_id": "inclusionAI/Ling-1T" + }, + { + "id": "openai/o3-deep-research", + "name": "OpenAI: o3 Deep Research", + "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "openai/o4-mini-deep-research", + "name": "OpenAI: o4 Mini Deep Research", + "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", + "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context.", + "context_length": 131072, + "hugging_face_id": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5" + }, + { + "id": "baidu/ernie-4.5-21b-a3b-thinking", + "name": "Baidu: ERNIE 4.5 21B A3B Thinking", + "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation,.", + "context_length": 131072, + "hugging_face_id": "baidu/ERNIE-4.5-21B-A3B-Thinking" + }, + { + "id": "google/gemini-2.5-flash-image", + "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "description": "Gemini 2.5 Flash Image, a.k.a.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-vl-30b-a3b-thinking", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos.", + "context_length": 131072, + "hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Thinking" + }, + { + "id": "qwen/qwen3-vl-30b-a3b-instruct", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-VL-30B-A3B-Instruct" + }, + { + "id": "openai/gpt-5-pro", + "name": "OpenAI: GPT-5 Pro", + "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "z-ai/glm-4.6", + "name": "Z.AI: GLM 4.6", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex.", + "context_length": 202752, + "hugging_face_id": "" + }, + { + "id": "z-ai/glm-4.6:exacto", + "name": "Z.AI: GLM 4.6 (exacto)", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex.", + "context_length": 202752, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-sonnet-4.5", + "name": "Anthropic: Claude Sonnet 4.5", + "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "deepseek/deepseek-v3.2-exp", + "name": "DeepSeek: DeepSeek V3.2 Exp", + "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3.2-Exp" + }, + { + "id": "thedrummer/cydonia-24b-v4.1", + "name": "TheDrummer: Cydonia 24B V4.1", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", + "context_length": 131072, + "hugging_face_id": "thedrummer/cydonia-24b-v4.1" + }, + { + "id": "relace/relace-apply-3", + "name": "Relace: Relace Apply 3", + "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-flash-preview-09-2025", + "name": "Google: Gemini 2.5 Flash Preview 09-2025", + "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-flash-lite-preview-09-2025", + "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-vl-235b-a22b-thinking", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-VL-235B-A22B-Thinking" + }, + { + "id": "qwen/qwen3-vl-235b-a22b-instruct", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-VL-235B-A22B-Instruct" + }, + { + "id": "qwen/qwen3-max", + "name": "Qwen: Qwen3 Max", + "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-coder-plus", + "name": "Qwen: Qwen3 Coder Plus", + "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5-codex", + "name": "OpenAI: GPT-5 Codex", + "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "deepseek/deepseek-v3.1-terminus", + "name": "DeepSeek: DeepSeek V3.1 Terminus", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3.1-Terminus" + }, + { + "id": "deepseek/deepseek-v3.1-terminus:exacto", + "name": "DeepSeek: DeepSeek V3.1 Terminus (exacto)", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language.", + "context_length": 131072, + "hugging_face_id": "deepseek-ai/DeepSeek-V3.1-Terminus" + }, + { + "id": "x-ai/grok-4-fast", + "name": "xAI: Grok 4 Fast", + "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window.", + "context_length": 2000000, + "hugging_face_id": "" + }, + { + "id": "alibaba/tongyi-deepresearch-30b-a3b:free", + "name": "Tongyi DeepResearch 30B A3B (free)", + "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token.", + "context_length": 131072, + "hugging_face_id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B" + }, + { + "id": "alibaba/tongyi-deepresearch-30b-a3b", + "name": "Tongyi DeepResearch 30B A3B", + "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token.", + "context_length": 131072, + "hugging_face_id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B" + }, + { + "id": "qwen/qwen3-coder-flash", + "name": "Qwen: Qwen3 Coder Flash", + "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "arcee-ai/afm-4.5b", + "name": "Arcee AI: AFM 4.5B", + "description": "AFM-4.5B is a 4.5 billion parameter instruction-tuned language model developed by Arcee AI.", + "context_length": 65536, + "hugging_face_id": "arcee-ai/AFM-4.5B" + }, + { + "id": "opengvlab/internvl3-78b", + "name": "OpenGVLab: InternVL3 78B", + "description": "The InternVL3 series is an advanced multimodal large language model (MLLM).", + "context_length": 32768, + "hugging_face_id": "OpenGVLab/InternVL3-78B" + }, + { + "id": "qwen/qwen3-next-80b-a3b-thinking", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Thinking" + }, + { + "id": "qwen/qwen3-next-80b-a3b-instruct", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-Next-80B-A3B-Instruct" + }, + { + "id": "meituan/longcat-flash-chat:free", + "name": "Meituan: LongCat Flash Chat (free)", + "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input.", + "context_length": 131072, + "hugging_face_id": "meituan-longcat/LongCat-Flash-Chat" + }, + { + "id": "meituan/longcat-flash-chat", + "name": "Meituan: LongCat Flash Chat", + "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input.", + "context_length": 131072, + "hugging_face_id": "meituan-longcat/LongCat-Flash-Chat" + }, + { + "id": "qwen/qwen-plus-2025-07-28", + "name": "Qwen: Qwen Plus 0728", + "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-plus-2025-07-28:thinking", + "name": "Qwen: Qwen Plus 0728 (thinking)", + "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "nvidia/nemotron-nano-9b-v2:free", + "name": "NVIDIA: Nemotron Nano 9B V2 (free)", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks.", + "context_length": 128000, + "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2" + }, + { + "id": "nvidia/nemotron-nano-9b-v2", + "name": "NVIDIA: Nemotron Nano 9B V2", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks.", + "context_length": 131072, + "hugging_face_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2" + }, + { + "id": "moonshotai/kimi-k2-0905", + "name": "MoonshotAI: Kimi K2 0905", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2).", + "context_length": 262144, + "hugging_face_id": "moonshotai/Kimi-K2-Instruct-0905" + }, + { + "id": "moonshotai/kimi-k2-0905:exacto", + "name": "MoonshotAI: Kimi K2 0905 (exacto)", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2).", + "context_length": 262144, + "hugging_face_id": "moonshotai/Kimi-K2-Instruct-0905" + }, + { + "id": "deepcogito/cogito-v2-preview-llama-70b", + "name": "Deep Cogito: Cogito V2 Preview Llama 70B", + "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection.", + "context_length": 32768, + "hugging_face_id": "deepcogito/cogito-v2-preview-llama-70B" + }, + { + "id": "deepcogito/cogito-v2-preview-llama-109b-moe", + "name": "Cogito V2 Preview Llama 109B", + "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E.", + "context_length": 32767, + "hugging_face_id": "deepcogito/cogito-v2-preview-llama-109B-MoE" + }, + { + "id": "deepcogito/cogito-v2-preview-deepseek-671b", + "name": "Deep Cogito: Cogito V2 Preview Deepseek 671B", + "description": "Cogito v2 is a multilingual, instruction-tuned Mixture of Experts (MoE) large language model with 671 billion parameters.", + "context_length": 163840, + "hugging_face_id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE" + }, + { + "id": "stepfun-ai/step3", + "name": "StepFun: Step3", + "description": "Step3 is a cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active.", + "context_length": 65536, + "hugging_face_id": "stepfun-ai/step3" + }, + { + "id": "qwen/qwen3-30b-a3b-thinking-2507", + "name": "Qwen: Qwen3 30B A3B Thinking 2507", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-30B-A3B-Thinking-2507" + }, + { + "id": "x-ai/grok-code-fast-1", + "name": "xAI: Grok Code Fast 1", + "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "nousresearch/hermes-4-70b", + "name": "Nous: Hermes 4 70B", + "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B.", + "context_length": 131072, + "hugging_face_id": "NousResearch/Hermes-4-70B" + }, + { + "id": "nousresearch/hermes-4-405b", + "name": "Nous: Hermes 4 405B", + "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research.", + "context_length": 131072, + "hugging_face_id": "NousResearch/Hermes-4-405B" + }, + { + "id": "google/gemini-2.5-flash-image-preview", + "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", + "description": "Gemini 2.5 Flash Image Preview, a.k.a.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "deepseek/deepseek-chat-v3.1:free", + "name": "DeepSeek: DeepSeek V3.1 (free)", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates.", + "context_length": 163800, + "hugging_face_id": "deepseek-ai/DeepSeek-V3.1" + }, + { + "id": "deepseek/deepseek-chat-v3.1", + "name": "DeepSeek: DeepSeek V3.1", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3.1" + }, + { + "id": "openai/gpt-4o-audio-preview", + "name": "OpenAI: GPT-4o Audio", + "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-medium-3.1", + "name": "Mistral: Mistral Medium 3.1", + "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "baidu/ernie-4.5-21b-a3b", + "name": "Baidu: ERNIE 4.5 21B A3B", + "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through.", + "context_length": 120000, + "hugging_face_id": "baidu/ERNIE-4.5-21B-A3B-PT" + }, + { + "id": "baidu/ernie-4.5-vl-28b-a3b", + "name": "Baidu: ERNIE 4.5 VL 28B A3B", + "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative.", + "context_length": 30000, + "hugging_face_id": "baidu/ERNIE-4.5-VL-28B-A3B-PT" + }, + { + "id": "z-ai/glm-4.5v", + "name": "Z.AI: GLM 4.5V", + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications.", + "context_length": 65536, + "hugging_face_id": "zai-org/GLM-4.5V" + }, + { + "id": "ai21/jamba-mini-1.7", + "name": "AI21: Jamba Mini 1.7", + "description": "Jamba Mini 1.7 is a compact and efficient member of the Jamba open model family, incorporating key improvements in grounding and instruction-following while maintaining the benefits of the.", + "context_length": 256000, + "hugging_face_id": "ai21labs/AI21-Jamba-Mini-1.7" + }, + { + "id": "ai21/jamba-large-1.7", + "name": "AI21: Jamba Large 1.7", + "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency.", + "context_length": 256000, + "hugging_face_id": "ai21labs/AI21-Jamba-Large-1.7" + }, + { + "id": "openai/gpt-5-chat", + "name": "OpenAI: GPT-5 Chat", + "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5", + "name": "OpenAI: GPT-5", + "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5-mini", + "name": "OpenAI: GPT-5 Mini", + "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-5-nano", + "name": "OpenAI: GPT-5 Nano", + "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments.", + "context_length": 400000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-oss-120b", + "name": "OpenAI: gpt-oss-120b", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases.", + "context_length": 131072, + "hugging_face_id": "openai/gpt-oss-120b" + }, + { + "id": "openai/gpt-oss-120b:exacto", + "name": "OpenAI: gpt-oss-120b (exacto)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases.", + "context_length": 131072, + "hugging_face_id": "openai/gpt-oss-120b" + }, + { + "id": "openai/gpt-oss-20b:free", + "name": "OpenAI: gpt-oss-20b (free)", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license.", + "context_length": 131072, + "hugging_face_id": "openai/gpt-oss-20b" + }, + { + "id": "openai/gpt-oss-20b", + "name": "OpenAI: gpt-oss-20b", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license.", + "context_length": 131072, + "hugging_face_id": "openai/gpt-oss-20b" + }, + { + "id": "anthropic/claude-opus-4.1", + "name": "Anthropic: Claude Opus 4.1", + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/codestral-2508", + "name": "Mistral: Codestral 2508", + "description": "Mistral's cutting-edge language model for coding released end of July 2025.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-coder-30b-a3b-instruct", + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding,.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct" + }, + { + "id": "qwen/qwen3-30b-a3b-instruct-2507", + "name": "Qwen: Qwen3 30B A3B Instruct 2507", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-30B-A3B-Instruct-2507" + }, + { + "id": "z-ai/glm-4.5", + "name": "Z.AI: GLM 4.5", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications.", + "context_length": 131072, + "hugging_face_id": "zai-org/GLM-4.5" + }, + { + "id": "z-ai/glm-4.5-air:free", + "name": "Z.AI: GLM 4.5 Air (free)", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications.", + "context_length": 131072, + "hugging_face_id": "zai-org/GLM-4.5-Air" + }, + { + "id": "z-ai/glm-4.5-air", + "name": "Z.AI: GLM 4.5 Air", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications.", + "context_length": 131072, + "hugging_face_id": "zai-org/GLM-4.5-Air" + }, + { + "id": "qwen/qwen3-235b-a22b-thinking-2507", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-235B-A22B-Thinking-2507" + }, + { + "id": "z-ai/glm-4-32b", + "name": "Z.AI: GLM 4 32B ", + "description": "GLM 4 32B is a cost-effective foundation language model.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-coder:free", + "name": "Qwen: Qwen3 Coder 480B A35B (free)", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.", + "context_length": 262000, + "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct" + }, + { + "id": "qwen/qwen3-coder", + "name": "Qwen: Qwen3 Coder 480B A35B", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct" + }, + { + "id": "qwen/qwen3-coder:exacto", + "name": "Qwen: Qwen3 Coder 480B A35B (exacto)", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct" + }, + { + "id": "bytedance/ui-tars-1.5-7b", + "name": "ByteDance: UI-TARS 7B ", + "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games.", + "context_length": 128000, + "hugging_face_id": "ByteDance-Seed/UI-TARS-1.5-7B" + }, + { + "id": "google/gemini-2.5-flash-lite", + "name": "Google: Gemini 2.5 Flash Lite", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-235b-a22b-2507", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass.", + "context_length": 262144, + "hugging_face_id": "Qwen/Qwen3-235B-A22B-Instruct-2507" + }, + { + "id": "switchpoint/router", + "name": "Switchpoint Router", + "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "moonshotai/kimi-k2:free", + "name": "MoonshotAI: Kimi K2 0711 (free)", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass.", + "context_length": 32768, + "hugging_face_id": "moonshotai/Kimi-K2-Instruct" + }, + { + "id": "moonshotai/kimi-k2", + "name": "MoonshotAI: Kimi K2 0711", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass.", + "context_length": 131072, + "hugging_face_id": "moonshotai/Kimi-K2-Instruct" + }, + { + "id": "thudm/glm-4.1v-9b-thinking", + "name": "THUDM: GLM 4.1V 9B Thinking", + "description": "GLM-4.1V-9B-Thinking is a 9B parameter vision-language model developed by THUDM, based on the GLM-4-9B foundation.", + "context_length": 65536, + "hugging_face_id": "THUDM/GLM-4.1V-9B-Thinking" + }, + { + "id": "mistralai/devstral-medium", + "name": "Mistral: Devstral Medium", + "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "mistralai/devstral-small", + "name": "Mistral: Devstral Small 1.1", + "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI.", + "context_length": 128000, + "hugging_face_id": "mistralai/Devstral-Small-2507" + }, + { + "id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", + "name": "Venice: Uncensored (free)", + "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai.", + "context_length": 32768, + "hugging_face_id": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition" + }, + { + "id": "x-ai/grok-4", + "name": "xAI: Grok 4", + "description": "Grok 4 is xAI's latest reasoning model with a 256k context window.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "google/gemma-3n-e2b-it:free", + "name": "Google: Gemma 3n 2B (free)", + "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture.", + "context_length": 8192, + "hugging_face_id": "google/gemma-3n-E2B-it" + }, + { + "id": "tencent/hunyuan-a13b-instruct", + "name": "Tencent: Hunyuan A13B Instruct", + "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought.", + "context_length": 131072, + "hugging_face_id": "tencent/Hunyuan-A13B-Instruct" + }, + { + "id": "tngtech/deepseek-r1t2-chimera:free", + "name": "TNG: DeepSeek R1T2 Chimera (free)", + "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech.", + "context_length": 163840, + "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera" + }, + { + "id": "tngtech/deepseek-r1t2-chimera", + "name": "TNG: DeepSeek R1T2 Chimera", + "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech.", + "context_length": 163840, + "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera" + }, + { + "id": "morph/morph-v3-large", + "name": "Morph: Morph V3 Large", + "description": "Morph's high-accuracy apply model for complex code edits.", + "context_length": 262144, + "hugging_face_id": "" + }, + { + "id": "morph/morph-v3-fast", + "name": "Morph: Morph V3 Fast", + "description": "Morph's fastest apply model for code edits.", + "context_length": 81920, + "hugging_face_id": "" + }, + { + "id": "baidu/ernie-4.5-vl-424b-a47b", + "name": "Baidu: ERNIE 4.5 VL 424B A47B ", + "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token.", + "context_length": 123000, + "hugging_face_id": "baidu/ERNIE-4.5-VL-424B-A47B-PT" + }, + { + "id": "baidu/ernie-4.5-300b-a47b", + "name": "Baidu: ERNIE 4.5 300B A47B ", + "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series.", + "context_length": 123000, + "hugging_face_id": "baidu/ERNIE-4.5-300B-A47B-PT" + }, + { + "id": "thedrummer/anubis-70b-v1.1", + "name": "TheDrummer: Anubis 70B V1.1", + "description": "TheDrummer's Anubis v1.1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories.", + "context_length": 131072, + "hugging_face_id": "TheDrummer/Anubis-70B-v1.1" + }, + { + "id": "inception/mercury", + "name": "Inception: Mercury", + "description": "Mercury is the first diffusion large language model (dLLM).", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-small-3.2-24b-instruct:free", + "name": "Mistral: Mistral Small 3.2 24B (free)", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling.", + "context_length": 131072, + "hugging_face_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506" + }, + { + "id": "mistralai/mistral-small-3.2-24b-instruct", + "name": "Mistral: Mistral Small 3.2 24B", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling.", + "context_length": 131072, + "hugging_face_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506" + }, + { + "id": "minimax/minimax-m1", + "name": "MiniMax: MiniMax M1", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-flash-lite-preview-06-17", + "name": "Google: Gemini 2.5 Flash Lite Preview 06-17", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-flash", + "name": "Google: Gemini 2.5 Flash", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-pro", + "name": "Google: Gemini 2.5 Pro", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "moonshotai/kimi-dev-72b", + "name": "MoonshotAI: Kimi Dev 72B", + "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks.", + "context_length": 131072, + "hugging_face_id": "moonshotai/Kimi-Dev-72B" + }, + { + "id": "openai/o3-pro", + "name": "OpenAI: o3 Pro", + "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "x-ai/grok-3-mini", + "name": "xAI: Grok 3 Mini", + "description": "A lightweight model that thinks before responding.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "x-ai/grok-3", + "name": "xAI: Grok 3", + "description": "Grok 3 is the latest model from xAI.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "mistralai/magistral-small-2506", + "name": "Mistral: Magistral Small 2506", + "description": "Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via.", + "context_length": 40000, + "hugging_face_id": "mistralai/Magistral-Small-2506" + }, + { + "id": "mistralai/magistral-medium-2506:thinking", + "name": "Mistral: Magistral Medium 2506 (thinking)", + "description": "Magistral is Mistral's first reasoning model.", + "context_length": 40960, + "hugging_face_id": "" + }, + { + "id": "mistralai/magistral-medium-2506", + "name": "Mistral: Magistral Medium 2506", + "description": "Magistral is Mistral's first reasoning model.", + "context_length": 40960, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-pro-preview", + "name": "Google: Gemini 2.5 Pro Preview 06-05", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "deepseek/deepseek-r1-0528-qwen3-8b:free", + "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B (free)", + "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and.", + "context_length": 131072, + "hugging_face_id": "deepseek-ai/deepseek-r1-0528-qwen3-8b" + }, + { + "id": "deepseek/deepseek-r1-0528-qwen3-8b", + "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B", + "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and.", + "context_length": 32768, + "hugging_face_id": "deepseek-ai/deepseek-r1-0528-qwen3-8b" + }, + { + "id": "deepseek/deepseek-r1-0528:free", + "name": "DeepSeek: R1 0528 (free)", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-0528" + }, + { + "id": "deepseek/deepseek-r1-0528", + "name": "DeepSeek: R1 0528", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-0528" + }, + { + "id": "anthropic/claude-opus-4", + "name": "Anthropic: Claude Opus 4", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-sonnet-4", + "name": "Anthropic: Claude Sonnet 4", + "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "mistralai/devstral-small-2505", + "name": "Mistral: Devstral Small 2505", + "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks.", + "context_length": 128000, + "hugging_face_id": "mistralai/Devstral-Small-2505" + }, + { + "id": "google/gemma-3n-e4b-it:free", + "name": "Google: Gemma 3n 4B (free)", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets.", + "context_length": 8192, + "hugging_face_id": "google/gemma-3n-E4B-it" + }, + { + "id": "google/gemma-3n-e4b-it", + "name": "Google: Gemma 3n 4B", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets.", + "context_length": 32768, + "hugging_face_id": "google/gemma-3n-E4B-it" + }, + { + "id": "openai/codex-mini", + "name": "OpenAI: Codex Mini", + "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "meta-llama/llama-3.3-8b-instruct:free", + "name": "Meta: Llama 3.3 8B Instruct (free)", + "description": "A lightweight and ultra-fast variant of Llama 3.3 70B, for use when quick response times are needed most.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "nousresearch/deephermes-3-mistral-24b-preview", + "name": "Nous: DeepHermes 3 Mistral 24B Preview", + "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning.", + "context_length": 32768, + "hugging_face_id": "NousResearch/DeepHermes-3-Mistral-24B-Preview" + }, + { + "id": "mistralai/mistral-medium-3", + "name": "Mistral: Mistral Medium 3", + "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.5-pro-preview-05-06", + "name": "Google: Gemini 2.5 Pro Preview 05-06", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "arcee-ai/spotlight", + "name": "Arcee AI: Spotlight", + "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "arcee-ai/maestro-reasoning", + "name": "Arcee AI: Maestro Reasoning", + "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "arcee-ai/virtuoso-large", + "name": "Arcee AI: Virtuoso Large", + "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "arcee-ai/coder-large", + "name": "Arcee AI: Coder Large", + "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "microsoft/phi-4-reasoning-plus", + "name": "Microsoft: Phi 4 Reasoning Plus", + "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks.", + "context_length": 32768, + "hugging_face_id": "microsoft/Phi-4-reasoning-plus" + }, + { + "id": "inception/mercury-coder", + "name": "Inception: Mercury Coder", + "description": "Mercury Coder is the first diffusion large language model (dLLM).", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen3-4b:free", + "name": "Qwen: Qwen3 4B (free)", + "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-4B" + }, + { + "id": "deepseek/deepseek-prover-v2", + "name": "DeepSeek: DeepSeek Prover V2", + "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-Prover-V2-671B" + }, + { + "id": "meta-llama/llama-guard-4-12b", + "name": "Meta: Llama Guard 4 12B", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification.", + "context_length": 163840, + "hugging_face_id": "meta-llama/Llama-Guard-4-12B" + }, + { + "id": "qwen/qwen3-30b-a3b:free", + "name": "Qwen: Qwen3 30B A3B (free)", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-30B-A3B" + }, + { + "id": "qwen/qwen3-30b-a3b", + "name": "Qwen: Qwen3 30B A3B", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-30B-A3B" + }, + { + "id": "qwen/qwen3-8b", + "name": "Qwen: Qwen3 8B", + "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue.", + "context_length": 128000, + "hugging_face_id": "Qwen/Qwen3-8B" + }, + { + "id": "qwen/qwen3-14b:free", + "name": "Qwen: Qwen3 14B (free)", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-14B" + }, + { + "id": "qwen/qwen3-14b", + "name": "Qwen: Qwen3 14B", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-14B" + }, + { + "id": "qwen/qwen3-32b", + "name": "Qwen: Qwen3 32B", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-32B" + }, + { + "id": "qwen/qwen3-235b-a22b:free", + "name": "Qwen: Qwen3 235B A22B (free)", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-235B-A22B" + }, + { + "id": "qwen/qwen3-235b-a22b", + "name": "Qwen: Qwen3 235B A22B", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.", + "context_length": 40960, + "hugging_face_id": "Qwen/Qwen3-235B-A22B" + }, + { + "id": "tngtech/deepseek-r1t-chimera:free", + "name": "TNG: DeepSeek R1T Chimera (free)", + "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3.", + "context_length": 163840, + "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera" + }, + { + "id": "tngtech/deepseek-r1t-chimera", + "name": "TNG: DeepSeek R1T Chimera", + "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3.", + "context_length": 163840, + "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera" + }, + { + "id": "microsoft/mai-ds-r1:free", + "name": "Microsoft: MAI DS R1 (free)", + "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile.", + "context_length": 163840, + "hugging_face_id": "microsoft/MAI-DS-R1" + }, + { + "id": "microsoft/mai-ds-r1", + "name": "Microsoft: MAI DS R1", + "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile.", + "context_length": 163840, + "hugging_face_id": "microsoft/MAI-DS-R1" + }, + { + "id": "openai/o4-mini-high", + "name": "OpenAI: o4 Mini High", + "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "openai/o3", + "name": "OpenAI: o3", + "description": "o3 is a well-rounded and powerful model across domains.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "openai/o4-mini", + "name": "OpenAI: o4 Mini", + "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen2.5-coder-7b-instruct", + "name": "Qwen: Qwen2.5 Coder 7B Instruct", + "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-Coder-7B-Instruct" + }, + { + "id": "openai/gpt-4.1", + "name": "OpenAI: GPT-4.1", + "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning.", + "context_length": 1047576, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4.1-mini", + "name": "OpenAI: GPT-4.1 Mini", + "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost.", + "context_length": 1047576, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4.1-nano", + "name": "OpenAI: GPT-4.1 Nano", + "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series.", + "context_length": 1047576, + "hugging_face_id": "" + }, + { + "id": "eleutherai/llemma_7b", + "name": "EleutherAI: Llemma 7b", + "description": "Llemma 7B is a language model for mathematics.", + "context_length": 4096, + "hugging_face_id": "EleutherAI/llemma_7b" + }, + { + "id": "alfredpros/codellama-7b-instruct-solidity", + "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", + "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", + "context_length": 4096, + "hugging_face_id": "AlfredPros/CodeLlama-7b-Instruct-Solidity" + }, + { + "id": "arliai/qwq-32b-arliai-rpr-v1:free", + "name": "ArliAI: QwQ 32B RpR v1 (free)", + "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series.", + "context_length": 32768, + "hugging_face_id": "ArliAI/QwQ-32B-ArliAI-RpR-v1" + }, + { + "id": "arliai/qwq-32b-arliai-rpr-v1", + "name": "ArliAI: QwQ 32B RpR v1", + "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series.", + "context_length": 32768, + "hugging_face_id": "ArliAI/QwQ-32B-ArliAI-RpR-v1" + }, + { + "id": "agentica-org/deepcoder-14b-preview:free", + "name": "Agentica: Deepcoder 14B Preview (free)", + "description": "DeepCoder-14B-Preview is a 14B parameter code generation model fine-tuned from DeepSeek-R1-Distill-Qwen-14B using reinforcement learning with GRPO+ and iterative context lengthening.", + "context_length": 96000, + "hugging_face_id": "agentica-org/DeepCoder-14B-Preview" + }, + { + "id": "agentica-org/deepcoder-14b-preview", + "name": "Agentica: Deepcoder 14B Preview", + "description": "DeepCoder-14B-Preview is a 14B parameter code generation model fine-tuned from DeepSeek-R1-Distill-Qwen-14B using reinforcement learning with GRPO+ and iterative context lengthening.", + "context_length": 96000, + "hugging_face_id": "agentica-org/DeepCoder-14B-Preview" + }, + { + "id": "x-ai/grok-3-mini-beta", + "name": "xAI: Grok 3 Mini Beta", + "description": "Grok 3 Mini is a lightweight, smaller thinking model.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "x-ai/grok-3-beta", + "name": "xAI: Grok 3 Beta", + "description": "Grok 3 is the latest model from xAI.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", + "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks.", + "context_length": 131072, + "hugging_face_id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1" + }, + { + "id": "meta-llama/llama-4-maverick:free", + "name": "Meta: Llama 4 Maverick (free)", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per.", + "context_length": 128000, + "hugging_face_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct" + }, + { + "id": "meta-llama/llama-4-maverick", + "name": "Meta: Llama 4 Maverick", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per.", + "context_length": 1048576, + "hugging_face_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct" + }, + { + "id": "meta-llama/llama-4-scout:free", + "name": "Meta: Llama 4 Scout (free)", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.", + "context_length": 128000, + "hugging_face_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct" + }, + { + "id": "meta-llama/llama-4-scout", + "name": "Meta: Llama 4 Scout", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.", + "context_length": 327680, + "hugging_face_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct" + }, + { + "id": "qwen/qwen2.5-vl-32b-instruct:free", + "name": "Qwen: Qwen2.5 VL 32B Instruct (free)", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities.", + "context_length": 16384, + "hugging_face_id": "Qwen/Qwen2.5-VL-32B-Instruct" + }, + { + "id": "qwen/qwen2.5-vl-32b-instruct", + "name": "Qwen: Qwen2.5 VL 32B Instruct", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities.", + "context_length": 16384, + "hugging_face_id": "Qwen/Qwen2.5-VL-32B-Instruct" + }, + { + "id": "deepseek/deepseek-chat-v3-0324:free", + "name": "DeepSeek: DeepSeek V3 0324 (free)", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3-0324" + }, + { + "id": "deepseek/deepseek-chat-v3-0324", + "name": "DeepSeek: DeepSeek V3 0324", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3-0324" + }, + { + "id": "openai/o1-pro", + "name": "OpenAI: o1-pro", + "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-small-3.1-24b-instruct:free", + "name": "Mistral: Mistral Small 3.1 24B (free)", + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities.", + "context_length": 96000, + "hugging_face_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503" + }, + { + "id": "mistralai/mistral-small-3.1-24b-instruct", + "name": "Mistral: Mistral Small 3.1 24B", + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities.", + "context_length": 131072, + "hugging_face_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503" + }, + { + "id": "allenai/olmo-2-0325-32b-instruct", + "name": "AllenAI: Olmo 2 32B Instruct", + "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model.", + "context_length": 4096, + "hugging_face_id": "allenai/OLMo-2-0325-32B-Instruct" + }, + { + "id": "google/gemma-3-4b-it:free", + "name": "Google: Gemma 3 4B (free)", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 32768, + "hugging_face_id": "google/gemma-3-4b-it" + }, + { + "id": "google/gemma-3-4b-it", + "name": "Google: Gemma 3 4B", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 96000, + "hugging_face_id": "google/gemma-3-4b-it" + }, + { + "id": "google/gemma-3-12b-it:free", + "name": "Google: Gemma 3 12B (free)", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 32768, + "hugging_face_id": "google/gemma-3-12b-it" + }, + { + "id": "google/gemma-3-12b-it", + "name": "Google: Gemma 3 12B", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 131072, + "hugging_face_id": "google/gemma-3-12b-it" + }, + { + "id": "cohere/command-a", + "name": "Cohere: Command A", + "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.", + "context_length": 256000, + "hugging_face_id": "CohereForAI/c4ai-command-a-03-2025" + }, + { + "id": "openai/gpt-4o-mini-search-preview", + "name": "OpenAI: GPT-4o-mini Search Preview", + "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4o-search-preview", + "name": "OpenAI: GPT-4o Search Preview", + "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "google/gemma-3-27b-it:free", + "name": "Google: Gemma 3 27B (free)", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "google/gemma-3-27b-it", + "name": "Google: Gemma 3 27B", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "thedrummer/skyfall-36b-v2", + "name": "TheDrummer: Skyfall 36B V2", + "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", + "context_length": 32768, + "hugging_face_id": "TheDrummer/Skyfall-36B-v2" + }, + { + "id": "microsoft/phi-4-multimodal-instruct", + "name": "Microsoft: Phi 4 Multimodal Instruct", + "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate.", + "context_length": 131072, + "hugging_face_id": "microsoft/Phi-4-multimodal-instruct" + }, + { + "id": "perplexity/sonar-reasoning-pro", + "name": "Perplexity: Sonar Reasoning Pro", + "description": "Note: Sonar Pro pricing includes Perplexity search pricing.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "perplexity/sonar-pro", + "name": "Perplexity: Sonar Pro", + "description": "Note: Sonar Pro pricing includes Perplexity search pricing.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "perplexity/sonar-deep-research", + "name": "Perplexity: Sonar Deep Research", + "description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwq-32b", + "name": "Qwen: QwQ 32B", + "description": "QwQ is the reasoning model of the Qwen series.", + "context_length": 32768, + "hugging_face_id": "Qwen/QwQ-32B" + }, + { + "id": "google/gemini-2.0-flash-lite-001", + "name": "Google: Gemini 2.0 Flash Lite", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-3.7-sonnet:thinking", + "name": "Anthropic: Claude 3.7 Sonnet (thinking)", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-3.7-sonnet", + "name": "Anthropic: Claude 3.7 Sonnet", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-saba", + "name": "Mistral: Saba", + "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "meta-llama/llama-guard-3-8b", + "name": "Llama Guard 3 8B", + "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-Guard-3-8B" + }, + { + "id": "openai/o3-mini-high", + "name": "OpenAI: o3 Mini High", + "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.0-flash-001", + "name": "Google: Gemini 2.0 Flash", + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-vl-plus", + "name": "Qwen: Qwen VL Plus", + "description": "Qwen's Enhanced Large Visual Language Model.", + "context_length": 7500, + "hugging_face_id": "" + }, + { + "id": "aion-labs/aion-1.0", + "name": "AionLabs: Aion-1.0", + "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "aion-labs/aion-1.0-mini", + "name": "AionLabs: Aion-1.0-Mini", + "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic.", + "context_length": 131072, + "hugging_face_id": "FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview" + }, + { + "id": "aion-labs/aion-rp-llama-3.1-8b", + "name": "AionLabs: Aion-RP 1.0 (8B)", + "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-vl-max", + "name": "Qwen: Qwen VL Max", + "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-turbo", + "name": "Qwen: Qwen-Turbo", + "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", + "context_length": 1000000, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen2.5-vl-72b-instruct", + "name": "Qwen: Qwen2.5 VL 72B Instruct", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-VL-72B-Instruct" + }, + { + "id": "qwen/qwen-plus", + "name": "Qwen: Qwen-Plus", + "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-max", + "name": "Qwen: Qwen-Max ", + "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "openai/o3-mini", + "name": "OpenAI: o3 Mini", + "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-small-24b-instruct-2501:free", + "name": "Mistral: Mistral Small 3 (free)", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-Small-24B-Instruct-2501" + }, + { + "id": "mistralai/mistral-small-24b-instruct-2501", + "name": "Mistral: Mistral Small 3", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-Small-24B-Instruct-2501" + }, + { + "id": "deepseek/deepseek-r1-distill-qwen-32b", + "name": "DeepSeek: R1 Distill Qwen 32B", + "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).", + "context_length": 131072, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" + }, + { + "id": "deepseek/deepseek-r1-distill-qwen-14b", + "name": "DeepSeek: R1 Distill Qwen 14B", + "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek.", + "context_length": 32768, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" + }, + { + "id": "perplexity/sonar-reasoning", + "name": "Perplexity: Sonar Reasoning", + "description": "Sonar Reasoning is a reasoning model provided by Perplexity based on [DeepSeek R1](/deepseek/deepseek-r1).", + "context_length": 127000, + "hugging_face_id": "" + }, + { + "id": "perplexity/sonar", + "name": "Perplexity: Sonar", + "description": "Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources.", + "context_length": 127072, + "hugging_face_id": "" + }, + { + "id": "deepseek/deepseek-r1-distill-llama-70b:free", + "name": "DeepSeek: R1 Distill Llama 70B (free)", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).", + "context_length": 8192, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + }, + { + "id": "deepseek/deepseek-r1-distill-llama-70b", + "name": "DeepSeek: R1 Distill Llama 70B", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1).", + "context_length": 131072, + "hugging_face_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + }, + { + "id": "deepseek/deepseek-r1:free", + "name": "DeepSeek: R1 (free)", + "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-R1" + }, + { + "id": "deepseek/deepseek-r1", + "name": "DeepSeek: R1", + "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-R1" + }, + { + "id": "minimax/minimax-01", + "name": "MiniMax: MiniMax-01", + "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding.", + "context_length": 1000192, + "hugging_face_id": "MiniMaxAI/MiniMax-Text-01" + }, + { + "id": "mistralai/codestral-2501", + "name": "Mistral: Codestral 2501", + "description": "[Mistral](/mistralai)'s cutting-edge language model for coding.", + "context_length": 256000, + "hugging_face_id": "" + }, + { + "id": "microsoft/phi-4", + "name": "Microsoft: Phi 4", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed.", + "context_length": 16384, + "hugging_face_id": "microsoft/phi-4" + }, + { + "id": "sao10k/l3.1-70b-hanami-x1", + "name": "Sao10K: Llama 3.1 70B Hanami x1", + "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", + "context_length": 16000, + "hugging_face_id": "Sao10K/L3.1-70B-Hanami-x1" + }, + { + "id": "deepseek/deepseek-chat", + "name": "DeepSeek: DeepSeek V3", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions.", + "context_length": 163840, + "hugging_face_id": "deepseek-ai/DeepSeek-V3" + }, + { + "id": "sao10k/l3.3-euryale-70b", + "name": "Sao10K: Llama 3.3 Euryale 70B", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).", + "context_length": 131072, + "hugging_face_id": "Sao10K/L3.3-70B-Euryale-v2.3" + }, + { + "id": "openai/o1", + "name": "OpenAI: o1", + "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "cohere/command-r7b-12-2024", + "name": "Cohere: Command R7B (12-2024)", + "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "google/gemini-2.0-flash-exp:free", + "name": "Google: Gemini 2.0 Flash Experimental (free)", + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro.", + "context_length": 1048576, + "hugging_face_id": "" + }, + { + "id": "meta-llama/llama-3.3-70b-instruct:free", + "name": "Meta: Llama 3.3 70B Instruct (free)", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out).", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct" + }, + { + "id": "meta-llama/llama-3.3-70b-instruct", + "name": "Meta: Llama 3.3 70B Instruct", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out).", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-3.3-70B-Instruct" + }, + { + "id": "amazon/nova-lite-v1", + "name": "Amazon: Nova Lite 1.0", + "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output.", + "context_length": 300000, + "hugging_face_id": "" + }, + { + "id": "amazon/nova-micro-v1", + "name": "Amazon: Nova Micro 1.0", + "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "amazon/nova-pro-v1", + "name": "Amazon: Nova Pro 1.0", + "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks.", + "context_length": 300000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4o-2024-11-20", + "name": "OpenAI: GPT-4o (2024-11-20)", + "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-large-2411", + "name": "Mistral Large 2411", + "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-large-2407", + "name": "Mistral Large 2407", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407).", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "mistralai/pixtral-large-2411", + "name": "Mistral: Pixtral Large 2411", + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411).", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-2.5-coder-32b-instruct:free", + "name": "Qwen2.5 Coder 32B Instruct (free)", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-Coder-32B-Instruct" + }, + { + "id": "qwen/qwen-2.5-coder-32b-instruct", + "name": "Qwen2.5 Coder 32B Instruct", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-Coder-32B-Instruct" + }, + { + "id": "raifle/sorcererlm-8x22b", + "name": "SorcererLM 8x22B", + "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).", + "context_length": 16000, + "hugging_face_id": "rAIfle/SorcererLM-8x22b-bf16" + }, + { + "id": "thedrummer/unslopnemo-12b", + "name": "TheDrummer: UnslopNemo 12B", + "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", + "context_length": 32768, + "hugging_face_id": "TheDrummer/UnslopNemo-12B-v4.1" + }, + { + "id": "anthropic/claude-3.5-haiku", + "name": "Anthropic: Claude 3.5 Haiku", + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-3.5-haiku-20241022", + "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", + "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "anthracite-org/magnum-v4-72b", + "name": "Magnum v4 72B", + "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and.", + "context_length": 16384, + "hugging_face_id": "anthracite-org/magnum-v4-72b" + }, + { + "id": "anthropic/claude-3.5-sonnet", + "name": "Anthropic: Claude 3.5 Sonnet", + "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/ministral-3b", + "name": "Mistral: Ministral 3B", + "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "mistralai/ministral-8b", + "name": "Mistral: Ministral 8B", + "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference.", + "context_length": 131072, + "hugging_face_id": "" + }, + { + "id": "qwen/qwen-2.5-7b-instruct", + "name": "Qwen: Qwen2.5 7B Instruct", + "description": "Qwen2.5 7B is the latest series of Qwen large language models.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-7B-Instruct" + }, + { + "id": "nvidia/llama-3.1-nemotron-70b-instruct", + "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", + "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses.", + "context_length": 131072, + "hugging_face_id": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" + }, + { + "id": "inflection/inflection-3-productivity", + "name": "Inflection: Inflection 3 Productivity", + "description": "Inflection 3 Productivity is optimized for following instructions.", + "context_length": 8000, + "hugging_face_id": "" + }, + { + "id": "inflection/inflection-3-pi", + "name": "Inflection: Inflection 3 Pi", + "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety.", + "context_length": 8000, + "hugging_face_id": "" + }, + { + "id": "thedrummer/rocinante-12b", + "name": "TheDrummer: Rocinante 12B", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.", + "context_length": 32768, + "hugging_face_id": "TheDrummer/Rocinante-12B-v1.1" + }, + { + "id": "meta-llama/llama-3.2-90b-vision-instruct", + "name": "Meta: Llama 3.2 90B Vision Instruct", + "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks.", + "context_length": 32768, + "hugging_face_id": "meta-llama/Llama-3.2-90B-Vision-Instruct" + }, + { + "id": "meta-llama/llama-3.2-1b-instruct", + "name": "Meta: Llama 3.2 1B Instruct", + "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis.", + "context_length": 60000, + "hugging_face_id": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "id": "meta-llama/llama-3.2-3b-instruct:free", + "name": "Meta: Llama 3.2 3B Instruct (free)", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct" + }, + { + "id": "meta-llama/llama-3.2-3b-instruct", + "name": "Meta: Llama 3.2 3B Instruct", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-3.2-3B-Instruct" + }, + { + "id": "meta-llama/llama-3.2-11b-vision-instruct", + "name": "Meta: Llama 3.2 11B Vision Instruct", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Llama-3.2-11B-Vision-Instruct" + }, + { + "id": "qwen/qwen-2.5-72b-instruct:free", + "name": "Qwen2.5 72B Instruct (free)", + "description": "Qwen2.5 72B is the latest series of Qwen large language models.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-72B-Instruct" + }, + { + "id": "qwen/qwen-2.5-72b-instruct", + "name": "Qwen2.5 72B Instruct", + "description": "Qwen2.5 72B is the latest series of Qwen large language models.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-72B-Instruct" + }, + { + "id": "neversleep/llama-3.1-lumimaid-8b", + "name": "NeverSleep: Lumimaid v0.2 8B", + "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1.", + "context_length": 32768, + "hugging_face_id": "NeverSleep/Lumimaid-v0.2-8B" + }, + { + "id": "mistralai/pixtral-12b", + "name": "Mistral: Pixtral 12B", + "description": "The first multi-modal, text+image-to-text model from Mistral AI.", + "context_length": 32768, + "hugging_face_id": "mistralai/Pixtral-12B-2409" + }, + { + "id": "cohere/command-r-08-2024", + "name": "Cohere: Command R (08-2024)", + "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "cohere/command-r-plus-08-2024", + "name": "Cohere: Command R+ (08-2024)", + "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "sao10k/l3.1-euryale-70b", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).", + "context_length": 32768, + "hugging_face_id": "Sao10K/L3.1-70B-Euryale-v2.2" + }, + { + "id": "qwen/qwen-2.5-vl-7b-instruct", + "name": "Qwen: Qwen2.5-VL 7B Instruct", + "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art.", + "context_length": 32768, + "hugging_face_id": "Qwen/Qwen2.5-VL-7B-Instruct" + }, + { + "id": "microsoft/phi-3.5-mini-128k-instruct", + "name": "Microsoft: Phi-3.5 Mini 128K Instruct", + "description": "Phi-3.5 models are lightweight, state-of-the-art open models.", + "context_length": 128000, + "hugging_face_id": "microsoft/Phi-3.5-mini-instruct" + }, + { + "id": "nousresearch/hermes-3-llama-3.1-70b", + "name": "Nous: Hermes 3 70B Instruct", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying,.", + "context_length": 65536, + "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-70B" + }, + { + "id": "nousresearch/hermes-3-llama-3.1-405b:free", + "name": "Nous: Hermes 3 405B Instruct (free)", + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context.", + "context_length": 131072, + "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B" + }, + { + "id": "nousresearch/hermes-3-llama-3.1-405b", + "name": "Nous: Hermes 3 405B Instruct", + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context.", + "context_length": 131072, + "hugging_face_id": "NousResearch/Hermes-3-Llama-3.1-405B" + }, + { + "id": "openai/chatgpt-4o-latest", + "name": "OpenAI: ChatGPT-4o", + "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "sao10k/l3-lunaris-8b", + "name": "Sao10K: Llama 3 8B Lunaris", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3.", + "context_length": 8192, + "hugging_face_id": "Sao10K/L3-8B-Lunaris-v1" + }, + { + "id": "openai/gpt-4o-2024-08-06", + "name": "OpenAI: GPT-4o (2024-08-06)", + "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "meta-llama/llama-3.1-405b", + "name": "Meta: Llama 3.1 405B (base)", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.", + "context_length": 32768, + "hugging_face_id": "meta-llama/llama-3.1-405B" + }, + { + "id": "meta-llama/llama-3.1-70b-instruct", + "name": "Meta: Llama 3.1 70B Instruct", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Meta-Llama-3.1-70B-Instruct" + }, + { + "id": "meta-llama/llama-3.1-405b-instruct", + "name": "Meta: Llama 3.1 405B Instruct", + "description": "The highly anticipated 400B class of Llama3 is here.", + "context_length": 130815, + "hugging_face_id": "meta-llama/Meta-Llama-3.1-405B-Instruct" + }, + { + "id": "meta-llama/llama-3.1-8b-instruct", + "name": "Meta: Llama 3.1 8B Instruct", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors.", + "context_length": 131072, + "hugging_face_id": "meta-llama/Meta-Llama-3.1-8B-Instruct" + }, + { + "id": "mistralai/mistral-nemo:free", + "name": "Mistral: Mistral Nemo (free)", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.", + "context_length": 131072, + "hugging_face_id": "mistralai/Mistral-Nemo-Instruct-2407" + }, + { + "id": "mistralai/mistral-nemo", + "name": "Mistral: Mistral Nemo", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.", + "context_length": 131072, + "hugging_face_id": "mistralai/Mistral-Nemo-Instruct-2407" + }, + { + "id": "openai/gpt-4o-mini", + "name": "OpenAI: GPT-4o-mini", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4o-mini-2024-07-18", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "google/gemma-2-27b-it", + "name": "Google: Gemma 2 27B", + "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).", + "context_length": 8192, + "hugging_face_id": "google/gemma-2-27b-it" + }, + { + "id": "google/gemma-2-9b-it", + "name": "Google: Gemma 2 9B", + "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.", + "context_length": 8192, + "hugging_face_id": "google/gemma-2-9b-it" + }, + { + "id": "anthropic/claude-3.5-sonnet-20240620", + "name": "Anthropic: Claude 3.5 Sonnet (2024-06-20)", + "description": "Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "sao10k/l3-euryale-70b", + "name": "Sao10k: Llama 3 Euryale 70B v2.1", + "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).", + "context_length": 8192, + "hugging_face_id": "Sao10K/L3-70B-Euryale-v2.1" + }, + { + "id": "mistralai/mistral-7b-instruct-v0.3", + "name": "Mistral: Mistral 7B Instruct v0.3", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3" + }, + { + "id": "mistralai/mistral-7b-instruct:free", + "name": "Mistral: Mistral 7B Instruct (free)", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3" + }, + { + "id": "mistralai/mistral-7b-instruct", + "name": "Mistral: Mistral 7B Instruct", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.3" + }, + { + "id": "nousresearch/hermes-2-pro-llama-3-8b", + "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", + "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON.", + "context_length": 8192, + "hugging_face_id": "NousResearch/Hermes-2-Pro-Llama-3-8B" + }, + { + "id": "microsoft/phi-3-mini-128k-instruct", + "name": "Microsoft: Phi-3 Mini 128K Instruct", + "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following.", + "context_length": 128000, + "hugging_face_id": "microsoft/Phi-3-mini-128k-instruct" + }, + { + "id": "microsoft/phi-3-medium-128k-instruct", + "name": "Microsoft: Phi-3 Medium 128K Instruct", + "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following.", + "context_length": 128000, + "hugging_face_id": "microsoft/Phi-3-medium-128k-instruct" + }, + { + "id": "openai/gpt-4o", + "name": "OpenAI: GPT-4o", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4o:extended", + "name": "OpenAI: GPT-4o (extended)", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4o-2024-05-13", + "name": "OpenAI: GPT-4o (2024-05-13)", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "meta-llama/llama-guard-2-8b", + "name": "Meta: LlamaGuard 2 8B", + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family.", + "context_length": 8192, + "hugging_face_id": "meta-llama/Meta-Llama-Guard-2-8B" + }, + { + "id": "meta-llama/llama-3-8b-instruct", + "name": "Meta: Llama 3 8B Instruct", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors.", + "context_length": 8192, + "hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct" + }, + { + "id": "meta-llama/llama-3-70b-instruct", + "name": "Meta: Llama 3 70B Instruct", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors.", + "context_length": 8192, + "hugging_face_id": "meta-llama/Meta-Llama-3-70B-Instruct" + }, + { + "id": "mistralai/mixtral-8x22b-instruct", + "name": "Mistral: Mixtral 8x22B Instruct", + "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).", + "context_length": 65536, + "hugging_face_id": "mistralai/Mixtral-8x22B-Instruct-v0.1" + }, + { + "id": "microsoft/wizardlm-2-8x22b", + "name": "WizardLM-2 8x22B", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model.", + "context_length": 65536, + "hugging_face_id": "microsoft/WizardLM-2-8x22B" + }, + { + "id": "openai/gpt-4-turbo", + "name": "OpenAI: GPT-4 Turbo", + "description": "The latest GPT-4 Turbo model with vision capabilities.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-3-haiku", + "name": "Anthropic: Claude 3 Haiku", + "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "anthropic/claude-3-opus", + "name": "Anthropic: Claude 3 Opus", + "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks.", + "context_length": 200000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-large", + "name": "Mistral Large", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`).", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4-turbo-preview", + "name": "OpenAI: GPT-4 Turbo Preview", + "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-3.5-turbo-0613", + "name": "OpenAI: GPT-3.5 Turbo (older v0613)", + "description": "GPT-3.5 Turbo is OpenAI's fastest model.", + "context_length": 4095, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-small", + "name": "Mistral Small", + "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-tiny", + "name": "Mistral Tiny", + "description": "Note: This model is being deprecated.", + "context_length": 32768, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-7b-instruct-v0.2", + "name": "Mistral: Mistral 7B Instruct v0.2", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.2" + }, + { + "id": "mistralai/mixtral-8x7b-instruct", + "name": "Mistral: Mixtral 8x7B Instruct", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use.", + "context_length": 32768, + "hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1" + }, + { + "id": "neversleep/noromaid-20b", + "name": "Noromaid 20B", + "description": "A collab between IkariDev and Undi.", + "context_length": 4096, + "hugging_face_id": "NeverSleep/Noromaid-20b-v0.1.1" + }, + { + "id": "alpindale/goliath-120b", + "name": "Goliath 120B", + "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model.", + "context_length": 6144, + "hugging_face_id": "alpindale/goliath-120b" + }, + { + "id": "openrouter/auto", + "name": "Auto Router", + "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.", + "context_length": 2000000, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4-1106-preview", + "name": "OpenAI: GPT-4 Turbo (older v1106)", + "description": "The latest GPT-4 Turbo model with vision capabilities.", + "context_length": 128000, + "hugging_face_id": "" + }, + { + "id": "mistralai/mistral-7b-instruct-v0.1", + "name": "Mistral: Mistral 7B Instruct v0.1", + "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.", + "context_length": 2824, + "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1" + }, + { + "id": "openai/gpt-3.5-turbo-instruct", + "name": "OpenAI: GPT-3.5 Turbo Instruct", + "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations.", + "context_length": 4095, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-3.5-turbo-16k", + "name": "OpenAI: GPT-3.5 Turbo 16k", + "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost.", + "context_length": 16385, + "hugging_face_id": "" + }, + { + "id": "mancer/weaver", + "name": "Mancer: Weaver (alpha)", + "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory.", + "context_length": 8000, + "hugging_face_id": "" + }, + { + "id": "undi95/remm-slerp-l2-13b", + "name": "ReMM SLERP 13B", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models.", + "context_length": 6144, + "hugging_face_id": "Undi95/ReMM-SLERP-L2-13B" + }, + { + "id": "gryphe/mythomax-l2-13b", + "name": "MythoMax 13B", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay.", + "context_length": 4096, + "hugging_face_id": "Gryphe/MythoMax-L2-13b" + }, + { + "id": "openai/gpt-4-0314", + "name": "OpenAI: GPT-4 (older v0314)", + "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14.", + "context_length": 8191, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-4", + "name": "OpenAI: GPT-4", + "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and.", + "context_length": 8191, + "hugging_face_id": "" + }, + { + "id": "openai/gpt-3.5-turbo", + "name": "OpenAI: GPT-3.5 Turbo", + "description": "GPT-3.5 Turbo is OpenAI's fastest model.", + "context_length": 16385, + "hugging_face_id": "" + } + ] +} diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts index 1bab7abeb81..4d2078fb24a 100644 --- a/gui/src/pages/AddNewModel/configs/providers.ts +++ b/gui/src/pages/AddNewModel/configs/providers.ts @@ -3,6 +3,7 @@ import { ModelProviderTags } from "../../../components/modelSelection/utils"; import { completionParamsInputs } from "./completionParamsInputs"; import type { ModelPackage } from "./models"; import { models } from "./models"; +import { openRouterModelsList } from "./openRouterModel"; export interface InputDescriptor { inputType: HTMLInputTypeAttribute; @@ -170,6 +171,29 @@ export const providers: Partial> = { packages: [models.claude4Sonnet, models.claude41Opus, models.claude35Haiku], apiKeyUrl: "https://console.anthropic.com/account/keys", }, + openrouter: { + title: "OpenRouter", + provider: "openrouter", + description: + "OpenRouter provides access to a variety of LLMs including open-source and proprietary models.", + longDescription: `To get started with OpenRouter, sign up for an account at [openrouter.ai](https://openrouter.ai/) and obtain your API key from the dashboard.`, + icon: "openrouter.png", + tags: [ModelProviderTags.RequiresApiKey], + refPage: "openrouter", + apiKeyUrl: "https://openrouter.ai/settings/keys", + collectInputFor: [ + { + inputType: "text", + key: "apiKey", + label: "API Key", + placeholder: "Enter your OpenRouter API key", + required: true, + }, + ...completionParamsInputsConfigs, + ], + packages: openRouterModelsList, + }, + moonshot: { title: "Moonshot", provider: "moonshot",