Skip to content

Commit c153d7b

Browse files
committed
feat: enable reasoning toggle for ollama models
Signed-off-by: Fred Bricon <[email protected]>
1 parent 80c6797 commit c153d7b

File tree

8 files changed

+56
-21
lines changed

8 files changed

+56
-21
lines changed

core/llm/autodetect.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import { ChatMessage, ModelCapability, TemplateType } from "../index.js";
1+
import {
2+
ChatMessage,
3+
ModelCapability,
4+
ModelDescription,
5+
TemplateType,
6+
} from "../index.js";
27
import { NEXT_EDIT_MODELS } from "./constants.js";
38

49
import {
@@ -126,6 +131,26 @@ function modelSupportsImages(
126131

127132
return false;
128133
}
134+
135+
function modelSupportsThinking(
136+
model: ModelDescription | null | undefined,
137+
): boolean {
138+
if (!model) {
139+
return false;
140+
}
141+
if ("anthropic" === model.underlyingProviderName) {
142+
return true;
143+
}
144+
if (model.model.includes("deepseek-r")) {
145+
return true;
146+
}
147+
if (model.completionOptions?.reasoning) {
148+
// Reasoning support is forced at the config level. Model might not necessarily support it though!
149+
return true;
150+
}
151+
return false;
152+
}
153+
129154
const PARALLEL_PROVIDERS: string[] = [
130155
"anthropic",
131156
"bedrock",
@@ -421,4 +446,5 @@ export {
421446
llmCanGenerateInParallel,
422447
modelSupportsImages,
423448
modelSupportsNextEdit,
449+
modelSupportsThinking,
424450
};

core/llm/llms/Ollama.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ interface OllamaChatOptions extends OllamaBaseOptions {
8787
tools?: OllamaTool[]; // the tools of the chat, this can be used to keep a tool memory
8888
// Not supported yet - tools: tools for the model to use if supported. Requires stream to be set to false
8989
// And correspondingly, tool calls in OllamaChatMessage
90+
think?: boolean; // if true the model will be prompted to think about the response before generating it
9091
}
9192

9293
type OllamaBaseResponse = {
@@ -146,7 +147,6 @@ class Ollama extends BaseLLM implements ModelInstaller {
146147
private static modelsBeingInstalledMutex = new Mutex();
147148

148149
private fimSupported: boolean = false;
149-
150150
constructor(options: LLMOptions) {
151151
super(options);
152152

@@ -393,6 +393,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
393393
model: this._getModel(),
394394
messages: ollamaMessages,
395395
options: this._getModelFileParams(options),
396+
think: options.reasoning,
396397
keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
397398
stream: options.stream,
398399
// format: options.format, // Not currently in base completion options

docs/reference.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ The `models` section defines the language models used in your configuration. Mod
219219
- `topP`: The cumulative probability for nucleus sampling.
220220
- `topK`: Maximum number of tokens considered at each step.
221221
- `stop`: An array of stop tokens that will terminate the completion.
222-
- `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ models.
222+
- `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
223223
- `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
224224

225225
- `requestOptions`: HTTP request options specific to the model.

docs/reference/json-reference.mdx

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ Parameters that control the behavior of text generation and completion settings.
191191
- `keepAlive`: For Ollama, this parameter sets the number of seconds to keep the model loaded after the last request, unloading it from memory if inactive (default: `1800` seconds, or 30 minutes).
192192
- `numGpu`: For Ollama, this parameter overrides the number of gpu layers that will be used to load the model into VRAM.
193193
- `useMmap`: For Ollama, this parameter allows the model to be mapped into memory. If disabled can enhance response time on low end devices but will slow down the stream.
194-
- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ models.
194+
- `reasoning`: Enables thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
195195
- `reasoningBudgetTokens`: Sets budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
196196

197197
Example
@@ -485,8 +485,6 @@ Several experimental config parameters are available, as described below:
485485
- `applyCodeBlock`: Model title for applying code blocks.
486486
- `repoMapFileSelection`: Model title for repo map selections.
487487

488-
489-
490488
- `modelContextProtocolServers`: See [Model Context Protocol](/customize/deep-dives/mcp)
491489

492490
Example
@@ -540,5 +538,3 @@ Some deprecated `config.json` settings are no longer stored in config and have b
540538
- `codeWrap`
541539
- `displayRawMarkdown`
542540
- `showChatScrollbar`
543-
544-

extensions/vscode/config_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
},
7979
"reasoning": {
8080
"title": "Reasoning",
81-
"description": "Enable thinking/reasoning for Anthropic Claude 3.7+ models",
81+
"description": "Enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models",
8282
"type": "boolean"
8383
},
8484
"reasoningBudgetTokens": {

gui/src/components/mainInput/InputToolbar.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ import {
55
} from "@heroicons/react/24/outline";
66
import { LightBulbIcon as LightBulbIconSolid } from "@heroicons/react/24/solid";
77
import { InputModifiers } from "core";
8-
import { modelSupportsImages } from "core/llm/autodetect";
8+
import {
9+
modelSupportsImages,
10+
modelSupportsThinking,
11+
} from "core/llm/autodetect";
912
import { useContext, useRef } from "react";
1013
import { IdeMessengerContext } from "../../context/IdeMessenger";
1114
import { useAppDispatch, useAppSelector } from "../../redux/hooks";
@@ -66,6 +69,8 @@ function InputToolbar(props: InputToolbarProps) {
6669
defaultModel.capabilities,
6770
);
6871

72+
const supportsThinking = modelSupportsThinking(defaultModel);
73+
6974
const smallFont = useFontSize(-2);
7075
const tinyFont = useFontSize(-3);
7176

@@ -139,7 +144,7 @@ function InputToolbar(props: InputToolbarProps) {
139144
</ToolTip>
140145
</HoverItem>
141146
)}
142-
{defaultModel?.underlyingProviderName === "anthropic" && (
147+
{supportsThinking && (
143148
<HoverItem
144149
onClick={() =>
145150
dispatch(setHasReasoningEnabled(!hasReasoningEnabled))

gui/src/hooks/ParallelListeners.tsx

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
} from "../redux/slices/sessionSlice";
2020
import { setTTSActive } from "../redux/slices/uiSlice";
2121

22+
import { modelSupportsThinking } from "core/llm/autodetect";
2223
import { cancelStream } from "../redux/thunks/cancelStream";
2324
import { handleApplyStateUpdate } from "../redux/thunks/handleApplyStateUpdate";
2425
import { refreshSessionMetadata } from "../redux/thunks/session";
@@ -79,12 +80,13 @@ function ParallelListeners() {
7980
document.body.style.fontSize = `${configResult.config.ui.fontSize}px`;
8081
}
8182

82-
if (
83-
configResult.config?.selectedModelByRole.chat?.completionOptions
84-
?.reasoning
85-
) {
86-
dispatch(setHasReasoningEnabled(true));
87-
}
83+
const chatModel = configResult.config?.selectedModelByRole.chat;
84+
const supportsReasoning = modelSupportsThinking(chatModel);
85+
const isReasoningDisabled =
86+
chatModel?.completionOptions?.reasoning === false;
87+
dispatch(
88+
setHasReasoningEnabled(supportsReasoning && !isReasoningDisabled),
89+
);
8890
},
8991
[dispatch, hasDoneInitialConfigLoad],
9092
);

gui/src/redux/thunks/streamNormalInput.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,17 @@ export const streamNormalInput = createAsyncThunk<
121121
};
122122
}
123123

124-
if (state.session.hasReasoningEnabled) {
124+
if (state.session.hasReasoningEnabled !== undefined) {
125125
completionOptions = {
126126
...completionOptions,
127-
reasoning: true,
128-
reasoningBudgetTokens:
129-
selectedChatModel.completionOptions?.reasoningBudgetTokens ?? 2048,
127+
reasoning: !!state.session.hasReasoningEnabled,
128+
...(state.session.hasReasoningEnabled &&
129+
selectedChatModel.underlyingProviderName !== "ollama" && {
130+
// Ollama doesn't support limiting reasoning tokens at this point
131+
reasoningBudgetTokens:
132+
selectedChatModel.completionOptions?.reasoningBudgetTokens ??
133+
2048,
134+
}),
130135
};
131136
}
132137

0 commit comments

Comments
 (0)