feat: enable reasoning toggle for ollama models

fbricon · fbricon · commit c153d7b972ba · 2025-09-01T14:49:20.000+02:00
Signed-off-by: Fred Bricon &lt;fbricon@gmail.com&gt;
diff --git a/core/llm/autodetect.ts b/core/llm/autodetect.ts
@@ -1,4 +1,9 @@
-import { ChatMessage, ModelCapability, TemplateType } from "../index.js";
+import {
+  ChatMessage,
+  ModelCapability,
+  ModelDescription,
+  TemplateType,
+} from "../index.js";
 import { NEXT_EDIT_MODELS } from "./constants.js";
 
 import {
@@ -126,6 +131,26 @@ function modelSupportsImages(
 
   return false;
 }
+
+function modelSupportsThinking(
+  model: ModelDescription | null | undefined,
+): boolean {
+  if (!model) {
+    return false;
+  }
+  if ("anthropic" === model.underlyingProviderName) {
+    return true;
+  }
+  if (model.model.includes("deepseek-r")) {
+    return true;
+  }
+  if (model.completionOptions?.reasoning) {
+    // Reasoning support is forced at the config level. Model might not necessarily support it though!
+    return true;
+  }
+  return false;
+}
+
 const PARALLEL_PROVIDERS: string[] = [
   "anthropic",
   "bedrock",
@@ -421,4 +446,5 @@ export {
   llmCanGenerateInParallel,
   modelSupportsImages,
   modelSupportsNextEdit,
+  modelSupportsThinking,
 };
diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts
@@ -87,6 +87,7 @@ interface OllamaChatOptions extends OllamaBaseOptions {
   tools?: OllamaTool[]; // the tools of the chat, this can be used to keep a tool memory
   // Not supported yet - tools: tools for the model to use if supported. Requires stream to be set to false
   // And correspondingly, tool calls in OllamaChatMessage
+  think?: boolean; // if true the model will be prompted to think about the response before generating it
 }
 
 type OllamaBaseResponse = {
@@ -146,7 +147,6 @@ class Ollama extends BaseLLM implements ModelInstaller {
   private static modelsBeingInstalledMutex = new Mutex();
 
   private fimSupported: boolean = false;
-
   constructor(options: LLMOptions) {
     super(options);
 
@@ -393,6 +393,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
       model: this._getModel(),
       messages: ollamaMessages,
       options: this._getModelFileParams(options),
+      think: options.reasoning,
       keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
       stream: options.stream,
       // format: options.format, // Not currently in base completion options
diff --git a/docs/reference.mdx b/docs/reference.mdx
@@ -219,7 +219,7 @@ The `models` section defines the language models used in your configuration. Mod
   - `topP`: The cumulative probability for nucleus sampling.
   - `topK`: Maximum number of tokens considered at each step.
   - `stop`: An array of stop tokens that will terminate the completion.
-  - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ models.
+  - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
   - `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
 
 - `requestOptions`: HTTP request options specific to the model.
diff --git a/docs/reference/json-reference.mdx b/docs/reference/json-reference.mdx
@@ -191,7 +191,7 @@ Parameters that control the behavior of text generation and completion settings.
 - `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes).
 - `numGpu`: For Ollama, this parameter overrides the number of gpu layers that will be used to load the model into VRAM.
 - `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream.
-- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ models.
+- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
 - `reasoningBudgetTokens`: Sets budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
 
 Example
@@ -485,8 +485,6 @@ Several experimental config parameters are available, as described below:
   - `applyCodeBlock`: Model title for applying code blocks.
   - `repoMapFileSelection`: Model title for repo map selections.
 
-
-
 - `modelContextProtocolServers`: See [Model Context Protocol](/customize/deep-dives/mcp)
 
 Example
@@ -540,5 +538,3 @@ Some deprecated `config.json` settings are no longer stored in config and have b
   - `codeWrap`
   - `displayRawMarkdown`
   - `showChatScrollbar`
-
-
diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json
@@ -78,7 +78,7 @@
         },
         "reasoning": {
           "title": "Reasoning",
-          "description": "Enable thinking/reasoning for Anthropic Claude 3.7+ models",
+          "description": "Enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models",
           "type": "boolean"
         },
         "reasoningBudgetTokens": {
diff --git a/gui/src/components/mainInput/InputToolbar.tsx b/gui/src/components/mainInput/InputToolbar.tsx
@@ -5,7 +5,10 @@ import {
 } from "@heroicons/react/24/outline";
 import { LightBulbIcon as LightBulbIconSolid } from "@heroicons/react/24/solid";
 import { InputModifiers } from "core";
-import { modelSupportsImages } from "core/llm/autodetect";
+import {
+  modelSupportsImages,
+  modelSupportsThinking,
+} from "core/llm/autodetect";
 import { useContext, useRef } from "react";
 import { IdeMessengerContext } from "../../context/IdeMessenger";
 import { useAppDispatch, useAppSelector } from "../../redux/hooks";
@@ -66,6 +69,8 @@ function InputToolbar(props: InputToolbarProps) {
       defaultModel.capabilities,
     );
 
+  const supportsThinking = modelSupportsThinking(defaultModel);
+
   const smallFont = useFontSize(-2);
   const tinyFont = useFontSize(-3);
 
@@ -139,7 +144,7 @@ function InputToolbar(props: InputToolbarProps) {
                 </ToolTip>
               </HoverItem>
             )}
-            {defaultModel?.underlyingProviderName === "anthropic" && (
+            {supportsThinking && (
               <HoverItem
                 onClick={() =>
                   dispatch(setHasReasoningEnabled(!hasReasoningEnabled))
diff --git a/gui/src/hooks/ParallelListeners.tsx b/gui/src/hooks/ParallelListeners.tsx
@@ -19,6 +19,7 @@ import {
 } from "../redux/slices/sessionSlice";
 import { setTTSActive } from "../redux/slices/uiSlice";
 
+import { modelSupportsThinking } from "core/llm/autodetect";
 import { cancelStream } from "../redux/thunks/cancelStream";
 import { handleApplyStateUpdate } from "../redux/thunks/handleApplyStateUpdate";
 import { refreshSessionMetadata } from "../redux/thunks/session";
@@ -79,12 +80,13 @@ function ParallelListeners() {
         document.body.style.fontSize = `${configResult.config.ui.fontSize}px`;
       }
 
-      if (
-        configResult.config?.selectedModelByRole.chat?.completionOptions
-          ?.reasoning
-      ) {
-        dispatch(setHasReasoningEnabled(true));
-      }
+      const chatModel = configResult.config?.selectedModelByRole.chat;
+      const supportsReasoning = modelSupportsThinking(chatModel);
+      const isReasoningDisabled =
+        chatModel?.completionOptions?.reasoning === false;
+      dispatch(
+        setHasReasoningEnabled(supportsReasoning && !isReasoningDisabled),
+      );
     },
     [dispatch, hasDoneInitialConfigLoad],
   );
diff --git a/gui/src/redux/thunks/streamNormalInput.ts b/gui/src/redux/thunks/streamNormalInput.ts
@@ -121,12 +121,17 @@ export const streamNormalInput = createAsyncThunk<
       };
     }
 
-    if (state.session.hasReasoningEnabled) {
+    if (state.session.hasReasoningEnabled !== undefined) {
       completionOptions = {
         ...completionOptions,
-        reasoning: true,
-        reasoningBudgetTokens:
-          selectedChatModel.completionOptions?.reasoningBudgetTokens ?? 2048,
+        reasoning: !!state.session.hasReasoningEnabled,
+        ...(state.session.hasReasoningEnabled &&
+          selectedChatModel.underlyingProviderName !== "ollama" && {
+            // Ollama doesn't support limiting reasoning tokens at this point
+            reasoningBudgetTokens:
+              selectedChatModel.completionOptions?.reasoningBudgetTokens ??
+              2048,
+          }),
       };
     }