diff --git a/NEWS b/NEWS index cb7e2975..3e9ca674 100644 --- a/NEWS +++ b/NEWS @@ -7,8 +7,33 @@ - Add support for ~gemini-2.5-pro~, ~gemini-2.5-flash~, ~gemini-2.5-flash-lite-preview-06-17~. +- Add support for Open WebUI. Open WebUI provides an + OpenAI-compatible API, so the "support" is just a new section of the + README with instructions. + ** New features and UI changes +- When including a file in the context, the abbreviated full path of + the file is included is now included instead of the basename. + Specifically, =/home/user/path/to/file= is included as + =~/path/to/file=. This is to provide additional context for LLM + actions, including tool-use in subsequent conversation turns. This + applies to context included via ~gptel-add~ or as a link in a + buffer. + +- Structured output support: ~gptel-request~ can now take an optional + schema argument to constrain LLM output to the specified JSON + schema. The JSON schema can be provided as a serialized JSON string + or as an elisp object (a nested plist). This feature works with all major + backends: OpenAI, Anthropic, Gemini, llama-cpp and Ollama. It is + presently supported by some but not all "OpenAI-compatible API" + providers. Note that this is only available via the ~gptel-request~ + API, and currently unsupported by ~gptel-send~. + +- From the dry-run inspector buffer, you can now copy the Curl command + for the request. Like when continuing the query, the request is + constructed from the contents of the buffer, which is editable. + - gptel now handles Ollama models that return both reasoning content and tool calls in a single request. diff --git a/README.org b/README.org index 5cc6c78a..349cdaae 100644 --- a/README.org +++ b/README.org @@ -14,6 +14,7 @@ gptel is a simple Large Language Model chat client for Emacs, with support for m | Anthropic (Claude) | ✓ | [[https://www.anthropic.com/api][API key]] | | Gemini | ✓ | [[https://makersuite.google.com/app/apikey][API key]] | | Ollama | ✓ | [[https://ollama.ai/][Ollama running locally]] | +| Open WebUI | ✓ | [[https://openwebui.com/][Open WebUI running locally]] | | Llama.cpp | ✓ | [[https://github.com/ggml-org/llama.cpp/tree/master/tools/server#quick-start][Llama.cpp running locally]] | | Llamafile | ✓ | [[https://github.com/Mozilla-Ocho/llamafile#quickstart][Local Llamafile server]] | | GPT4All | ✓ | [[https://gpt4all.io/index.html][GPT4All running locally]] | @@ -24,6 +25,7 @@ gptel is a simple Large Language Model chat client for Emacs, with support for m | Mistral Le Chat | ✓ | [[https://console.mistral.ai/api-keys][API key]] | | Perplexity | ✓ | [[https://docs.perplexity.ai/docs/getting-started][API key]] | | OpenRouter | ✓ | [[https://openrouter.ai/keys][API key]] | +| AI/ML API | ✓ | [[https://aimlapi.com/app/?utm_source=gptel&utm_medium=github&utm_campaign=integration][API key]] | | together.ai | ✓ | [[https://api.together.xyz/settings/api-keys][API key]] | | Anyscale | ✓ | [[https://docs.endpoints.anyscale.com/][API key]] | | PrivateGPT | ✓ | [[https://github.com/zylon-ai/private-gpt#-documentation][PrivateGPT running locally]] | @@ -101,6 +103,7 @@ gptel uses Curl if available, but falls back to the built-in url-retrieve to wor - [[#azure][Azure]] - [[#gpt4all][GPT4All]] - [[#ollama][Ollama]] + - [[#open-webui][Open WebUI]] - [[#gemini][Gemini]] - [[#llamacpp-or-llamafile][Llama.cpp or Llamafile]] - [[#kagi-fastgpt--summarizer][Kagi (FastGPT & Summarizer)]] @@ -118,6 +121,7 @@ gptel uses Curl if available, but falls back to the built-in url-retrieve to wor - [[#github-models][Github Models]] - [[#novita-ai][Novita AI]] - [[#xai][xAI]] + - [[#aiml-api][AI/ML API]] - [[#github-copilotchat][Github CopilotChat]] - [[#aws-bedrock][AWS Bedrock]] - [[#usage][Usage]] @@ -333,6 +337,62 @@ The above code makes the backend available to select. If you want it to be the #+html: +#+html:
+**** Open WebUI +#+html: + +[[https://openwebui.com/][Open WebUI]] is an open source, self-hosted system which provides a multi-user web chat interface and an API endpoint for accessing LLMs, especially LLMs running locally on inference servers like Ollama. + +Because it presents an OpenAI-compatible endpoint, you use ~gptel-make-openai~ to register it as a backend. + +For instance, you can use this form to register a backend for a local instance of Open Web UI served via http on port 3000: + +#+begin_src emacs-lisp +(gptel-make-openai "OpenWebUI" + :host "localhost:3000" + :protocol "http" + :key "KEY_FOR_ACCESSING_OPENWEBUI" + :endpoint "/api/chat/completions" + :stream t + :models '("gemma3n:latest")) +#+end_src + +Or if you are running Open Web UI on another host on your local network (~box.local~), serving via https with self-signed certificates, this will work: + +#+begin_src emacs-lisp +(gptel-make-openai "OpenWebUI" + :host "box.local" + :curl-args '("--insecure") ; needed for self-signed certs + :key "KEY_FOR_ACCESSING_OPENWEBUI" + :endpoint "/api/chat/completions" + :stream t + :models '("gemma3n:latest")) +#+end_src + +To find your API key in Open WebUI, click the user name in the bottom left, Settings, Account, and then Show by API Keys section. + +Refer to the documentation of =gptel-make-openai= for more configuration options. + +You can pick this backend from the menu when using gptel (see [[#usage][Usage]]) + +***** (Optional) Set as the default gptel backend + +The above code makes the backend available to select. If you want it to be the default backend for gptel, you can set this as the value of =gptel-backend=. Use this instead of the above. +#+begin_src emacs-lisp +;; OPTIONAL configuration +(setq + gptel-model "gemma3n:latest" + gptel-backend (gptel-make-openai "OpenWebUI" + :host "localhost:3000" + :protocol "http" + :key "KEY_FOR_ACCESSING_OPENWEBUI" + :endpoint "/api/chat/completions" + :stream t + :models '("gemma3n:latest"))) +#+end_src + +#+html:
+ #+html:
**** Gemini #+html: @@ -924,6 +984,41 @@ The above code makes the backend available to select. If you want it to be the #+html:
+#+html:
+**** AI/ML API +#+html: + +AI/ML API provides 300+ AI models including Deepseek, Gemini, ChatGPT. The models run at enterprise-grade rate limits and uptimes. + +Register a backend with +#+begin_src emacs-lisp +;; AI/ML API offers an OpenAI compatible API +(gptel-make-openai "AI/ML API" ;Any name you want + :host "api.aimlapi.com" + :endpoint "/v1/chat/completions" + :stream t + :key "your-api-key" ;can be a function that returns the key + :models '(deepseek-chat gemini-pro gpt-4o)) +#+end_src + +You can pick this backend from the menu when using gptel (see [[#usage][Usage]]). + +***** (Optional) Set as the default gptel backend + +The above code makes the backend available to select. If you want it to be the default backend for gptel, you can set this as the value of =gptel-backend=. Use this instead of the above. +#+begin_src emacs-lisp +;; OPTIONAL configuration +(setq gptel-model 'gpt-4o + gptel-backend + (gptel-make-openai "AI/ML API" + :host "api.aimlapi.com" + :endpoint "/v1/chat/completions" + :stream t + :key "your-api-key" + :models '(deepseek-chat gemini-pro gpt-4o))) +#+end_src + +#+html:
#+html:
**** Github CopilotChat #+html: @@ -1526,13 +1621,15 @@ Other Emacs clients for LLMs prescribe the format of the interaction (a comint s all) #+end_src -|----------------------+--------------------------------------------------------------------| -| *Connection options* | | -|----------------------+--------------------------------------------------------------------| -| =gptel-use-curl= | Use Curl (default), fallback to Emacs' built-in =url=. | -| =gptel-proxy= | Proxy server for requests, passed to curl via =--proxy=. | -| =gptel-api-key= | Variable/function that returns the API key for the active backend. | -|----------------------+--------------------------------------------------------------------| +|-------------------------+--------------------------------------------------------------------| +| *Connection options* | | +|-------------------------+--------------------------------------------------------------------| +| =gptel-use-curl= | Use Curl? (default), fallback to Emacs' built-in =url=. | +| | You can also specify the Curl path here. | +| =gptel-proxy= | Proxy server for requests, passed to curl via =--proxy=. | +| =gptel-curl-extra-args= | Extra arguments passed to Curl. | +| =gptel-api-key= | Variable/function that returns the API key for the active backend. | +|-------------------------+--------------------------------------------------------------------| |-----------------------+---------------------------------------------------------| | *LLM request options* | /(Note: not supported uniformly across LLMs)/ | @@ -1677,10 +1774,17 @@ There are several more: [[https://github.com/iwahbe/chat.el][chat.el]], [[https: gptel is a general-purpose package for chat and ad-hoc LLM interaction. The following packages use gptel to provide additional or specialized functionality: - [[https://github.com/karthink/gptel-quick][gptel-quick]]: Quickly look up the region or text at point. +- [[https://github.com/jwiegley/gptel-prompts][gptel-prompts]]: System prompt manager for gptel. +- [[https://github.com/dolmens/gptel-aibo/][gptel-aibo]]: A writing assistant system built on top of gptel. +- [[https://github.com/kmontag/macher][Macher]]: Project-aware multi-file LLM editing for Emacs, based on gptel. - [[https://github.com/daedsidog/evedel][Evedel]]: Instructed LLM Programmer/Assistant. - [[https://github.com/lanceberge/elysium][Elysium]]: Automatically apply AI-generated changes as you code. +- [[https://github.com/jwiegley/ob-gptel][ob-gptel]]: Org-babel backend for running gptel queries. +- [[https://github.com/JDNdeveloper/gptel-autocomplete][gptel-autocomplete]]: Inline completions using gptel. - [[https://github.com/kamushadenes/ai-blog.el][ai-blog.el]]: Streamline generation of blog posts in Hugo. -- [[https://github.com/douo/magit-gptcommit][magit-gptcommit]]: Generate Commit Messages within magit-status Buffer using gptel. +- [[https://github.com/lakkiy/gptel-commit][gptel-commit]]: Generate commit messages using gptel. +- [[https://github.com/douo/magit-gptcommit][magit-gptcommit]]: Generate commit messages within magit-status Buffer using gptel. +- [[https://github.com/ragnard/gptel-magit/][gptel-magit]]: Generate commit messages for magit using gptel. - [[https://github.com/armindarvish/consult-omni][consult-omni]]: Versatile multi-source search package. It includes gptel as one of its many sources. - [[https://github.com/ultronozm/ai-org-chat.el][ai-org-chat]]: Provides branching conversations in Org buffers using gptel. (Note that gptel includes this feature as well (see =gptel-org-branching-context=), but requires a recent version of Org mode 9.7 or later to be installed.) - [[https://github.com/rob137/Corsair][Corsair]]: Helps gather text to populate LLM prompts for gptel. diff --git a/gptel-anthropic.el b/gptel-anthropic.el index 48714dc0..181e79f3 100644 --- a/gptel-anthropic.el +++ b/gptel-anthropic.el @@ -230,6 +230,13 @@ Mutate state INFO with response metadata." (gptel--model-capable-p 'cache)) (nconc (aref tools-array (1- (length tools-array))) '(:cache_control (:type "ephemeral"))))))) + (when gptel--schema + (plist-put prompts-plist :tools + (vconcat + (list (gptel--parse-schema backend gptel--schema)) + (plist-get prompts-plist :tools))) + (plist-put prompts-plist :tool_choice + `(:type "tool" :name ,gptel--ersatz-json-tool))) ;; Merge request params with model and backend params. (gptel--merge-plists prompts-plist @@ -237,6 +244,15 @@ Mutate state INFO with response metadata." (gptel-backend-request-params gptel-backend) (gptel--model-request-params gptel-model)))) +(cl-defmethod gptel--parse-schema ((_backend gptel-anthropic) schema) + ;; Unlike the other backends, Anthropic generates JSON using a tool call. We + ;; write the tool here, meant to be added to :tools. + (list + :name "response_json" + :description "Record JSON output according to user prompt" + :input_schema (gptel--preprocess-schema + (gptel--dispatch-schema-type schema)))) + (cl-defmethod gptel--parse-tools ((_backend gptel-anthropic) tools) "Parse TOOLS to the Anthropic API tool definition spec. diff --git a/gptel-curl.el b/gptel-curl.el index 018ff07a..8873c0bc 100644 --- a/gptel-curl.el +++ b/gptel-curl.el @@ -112,8 +112,6 @@ REQUEST-DATA is the data to send, TOKEN is a unique identifier." collect (format "-H%s: %s" key val)) (list url)))) -;;TODO: The :transformer argument here is an alternate implementation of -;;`gptel-response-filter-functions'. The two need to be unified. ;;;###autoload (defun gptel-curl-get-response (fsm) "Fetch response to prompt in state FSM from the LLM using Curl. @@ -283,6 +281,9 @@ Optional RAW disables text properties and transformation." (set-marker-insertion-type tracking-marker t) (plist-put info :tracking-marker tracking-marker)) (goto-char tracking-marker) + (when (plist-get info :last-was-tool-result) + (insert gptel-response-separator) + (plist-put info :last-was-tool-result nil)) (unless raw (when transformer (setq response (funcall transformer response))) @@ -297,7 +298,8 @@ Optional RAW disables text properties and transformation." (`(tool-call . ,tool-calls) (gptel--display-tool-calls tool-calls info)) (`(tool-result . ,tool-results) - (gptel--display-tool-results tool-results info)))) + (gptel--display-tool-results tool-results info) + (plist-put info :last-was-tool-result t)))) (defun gptel-curl--stream-filter (process output) (let* ((fsm (car (alist-get process gptel--request-alist))) @@ -366,7 +368,7 @@ Optional RAW disables text properties and transformation." (progn (setq response (cons 'reasoning response)) (plist-put proc-info :reasoning-block 'in)) (plist-put proc-info :reasoning-block 'done))) - ((length> response 0) + ((and (not (eq reasoning-block t)) (length> response 0)) (if-let* ((idx (string-match-p "" response))) (progn (funcall callback @@ -374,10 +376,9 @@ Optional RAW disables text properties and transformation." (string-trim-left (substring response nil (+ idx 8)))) proc-info) - ;; Signal end of reasoning stream - (funcall callback '(reasoning . t) proc-info) - (setq response (substring response (+ idx 8))) - (plist-put proc-info :reasoning-block 'done)) + (setq reasoning-block t) ;Signal end of reasoning stream + (plist-put proc-info :reasoning-block t) + (setq response (substring response (+ idx 8)))) (setq response (cons 'reasoning response))))) (when (eq reasoning-block t) ;End of reasoning block (funcall callback '(reasoning . t) proc-info) diff --git a/gptel-gemini.el b/gptel-gemini.el index f29e3232..e58863b5 100644 --- a/gptel-gemini.el +++ b/gptel-gemini.el @@ -114,15 +114,15 @@ list." (cl-defmethod gptel--request-data ((backend gptel-gemini) prompts) "JSON encode PROMPTS for sending to Gemini." (let ((prompts-plist - `(:contents [,@prompts] - :safetySettings [(:category "HARM_CATEGORY_HARASSMENT" - :threshold "BLOCK_NONE") - (:category "HARM_CATEGORY_SEXUALLY_EXPLICIT" - :threshold "BLOCK_NONE") - (:category "HARM_CATEGORY_DANGEROUS_CONTENT" - :threshold "BLOCK_NONE") - (:category "HARM_CATEGORY_HATE_SPEECH" - :threshold "BLOCK_NONE")])) + (list :contents (vconcat prompts) + :safetySettings [(:category "HARM_CATEGORY_HARASSMENT" + :threshold "BLOCK_NONE") + (:category "HARM_CATEGORY_SEXUALLY_EXPLICIT" + :threshold "BLOCK_NONE") + (:category "HARM_CATEGORY_DANGEROUS_CONTENT" + :threshold "BLOCK_NONE") + (:category "HARM_CATEGORY_HATE_SPEECH" + :threshold "BLOCK_NONE")])) params) (if gptel--system-message (plist-put prompts-plist :system_instruction @@ -145,6 +145,9 @@ list." (when gptel-include-reasoning (setq params (plist-put params :thinkingConfig '(:includeThoughts t)))) + (when gptel--schema + (setq params (nconc params (gptel--gemini-filter-schema + (gptel--parse-schema backend gptel--schema))))) (when params (plist-put prompts-plist :generationConfig params)) @@ -155,6 +158,11 @@ list." (gptel-backend-request-params gptel-backend) (gptel--model-request-params gptel-model)))) +(cl-defmethod gptel--parse-schema ((_backend gptel-gemini) schema) + (list :responseMimeType "application/json" + :responseSchema (gptel--preprocess-schema + (gptel--dispatch-schema-type schema)))) + (defun gptel--gemini-filter-schema (schema) "Destructively filter unsupported attributes from SCHEMA. @@ -471,7 +479,7 @@ files in the context." :output-cost 10.00 ; 15 for >200k tokens :cutoff-date "2025-01") (gemini-2.5-flash-preview-04-17 - :description "Best model in terms of price-performance, offering well-rounded capabilities" + :description "DEPRECATED: Please use gemini-2.5-flash instead." :capabilities (tool-use json media) :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") @@ -480,7 +488,7 @@ files in the context." :output-cost 0.60 ; 3.50 for thinking :cutoff-date "2025-01") (gemini-2.5-flash-preview-05-20 - :description "Best model in terms of price-performance, offering well-rounded capabilities" + :description "DEPRECATED: Please use gemini-2.5-flash instead." :capabilities (tool-use json media) :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" "application/pdf" "text/plain" "text/csv" "text/html") @@ -488,6 +496,17 @@ files in the context." :input-cost 0.15 :output-cost 0.60 ; 3.50 for thinking :cutoff-date "2025-01") + (gemini-2.5-flash + :description "Best model in terms of price-performance, offering well-rounded capabilities" + :capabilities (tool-use json media audio video) + :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" + "application/pdf" "text/plain" "text/csv" "text/html" + "audio/mpeg" "audio/wav" "audio/ogg" "audio/flac" "audio/aac" "audio/mp3" + "video/mp4" "video/mpeg" "video/avi" "video/quicktime" "video/webm") + :context-window 1024 + :input-cost 0.15 + :output-cost 0.60 + :cutoff-date "2025-01") (gemini-2.5-pro-preview-05-06 :description "Previously most powerful thinking model with state-of-the-art performance" :capabilities (tool-use json media) @@ -525,13 +544,27 @@ files in the context." :output-cost 2.50 :cutoff-date "2025-01") (gemini-2.5-pro - :description "State-of-the-art thinking model, capable of reasoning over complex problems" - :capabilities (tool-use json media) + :description "Most powerful Gemini thinking model with state-of-the-art performance" + :capabilities (tool-use json media audio video) :mime-types ("image/png" "image/jpeg" "image/webp" "image/heic" "image/heif" - "application/pdf" "text/plain" "text/csv" "text/html") - :context-window 1048 ; 65536 output token limit - :input-cost 1.25 ; 2.50 for >200k tokens - :output-cost 10.00 ; 15 for >200k tokens + "application/pdf" "text/plain" "text/csv" "text/html" + "audio/mpeg" "audio/wav" "audio/ogg" "audio/flac" "audio/aac" "audio/mp3" + "video/mp4" "video/mpeg" "video/avi" "video/quicktime" "video/webm") + :context-window 1048 ; 65536 output token limit + :input-cost 1.25 ; 2.50 for >200k tokens + :output-cost 10.00 ; 15 for >200k tokens + :cutoff-date "2025-01") + (gemini-2.5-flash-lite-preview-06-17 + :description "Fastest and cheapest 2.5 model, for high-volume, latency-sensitive tasks." + :capabilities (tool-use json media audio video) + :mime-types ("image/png" "image/jpeg" "image/webp" "application/pdf" "text/plain" + "audio/x-aac" "audio/flac" "audio/mp3" "audio/m4a" "audio/mpeg" + "audio/mpga" "audio/mp4" "audio/opus" "audio/pcm" "audio/wav" "audio/webm" + "video/x-flv" "video/quicktime" "video/mpeg" "video/mp4" + "video/webm" "video/wmv" "video/3gpp") + :context-window 1048 + :input-cost 0.075 + :output-cost 0.30 :cutoff-date "2025-01") (gemini-2.0-flash-thinking-exp :description "DEPRECATED: Please use gemini-2.0-flash-thinking-exp-01-21 instead." diff --git a/gptel-ollama.el b/gptel-ollama.el index 26b4b75d..1c5a07af 100644 --- a/gptel-ollama.el +++ b/gptel-ollama.el @@ -101,7 +101,10 @@ Store response metadata in state INFO." (gptel--merge-plists `(:model ,(gptel--model-name gptel-model) :messages [,@prompts] - :stream ,(or gptel-stream :json-false)) + :stream ,(or gptel-stream :json-false) + ,@(and gptel--schema + `(:format ,(gptel--preprocess-schema + (gptel--dispatch-schema-type gptel--schema))))) gptel--request-params (gptel-backend-request-params gptel-backend) (gptel--model-request-params gptel-model))) diff --git a/gptel-openai-extras.el b/gptel-openai-extras.el index 9f2c8caa..f2c3d7b0 100644 --- a/gptel-openai-extras.el +++ b/gptel-openai-extras.el @@ -294,11 +294,11 @@ parameters." ;; and reset by the stream filter. (plist-put info :reasoning (concat (plist-get info :reasoning) reasoning)) - (when-let* ((content (plist-get delta :content)) - ((not (eq content :null)))) - (if (eq (plist-get info :reasoning-block) 'in) ;Check if in reasoning block - (plist-put info :reasoning-block t) ;End of streaming reasoning block - (plist-put info :reasoning-block 'done)) ;Not using a reasoning model + ;; Done with reasoning if we get non-empty content + (when-let* (((plist-member info :reasoning)) ;Is this a reasoning model? + (content (plist-get delta :content)) ;Started receiving text content? + ((not (or (eq content :null) (string-empty-p content))))) + (plist-put info :reasoning-block t) ;Signal end of reasoning block (throw 'done t))))))))))) (cl-defmethod gptel--parse-response :before ((_backend gptel-deepseek) response info) diff --git a/gptel-openai.el b/gptel-openai.el index 241a9b56..f4060406 100644 --- a/gptel-openai.el +++ b/gptel-openai.el @@ -40,6 +40,7 @@ (defvar gptel-track-media) (defvar gptel-use-tools) (defvar gptel-tools) +(defvar gptel--schema) (declare-function gptel-context--collect-media "gptel-context") (declare-function gptel--base64-encode "gptel") (declare-function gptel--trim-prefixes "gptel") @@ -47,6 +48,7 @@ (declare-function gptel--model-capable-p "gptel") (declare-function gptel--model-name "gptel") (declare-function gptel--get-api-key "gptel") +(declare-function gptel--insert-file-string "gptel") (declare-function prop-match-value "text-property-search") (declare-function text-property-search-backward "text-property-search") (declare-function json-read "json") @@ -57,6 +59,7 @@ (declare-function gptel-context--wrap "gptel-context") (declare-function gptel--inject-prompt "gptel") (declare-function gptel--parse-tools "gptel") +(declare-function gptel--parse-schema "gptel") ;; JSON conversion semantics used by gptel ;; empty object "{}" => empty list '() == nil @@ -216,7 +219,9 @@ information if the stream contains it." ;; No text content, so look for tool calls (when-let* ((tool-call (map-nested-elt delta '(:tool_calls 0))) (func (plist-get tool-call :function))) - (if (plist-get func :name) ;new tool block begins + (if (and (plist-get func :name) + ;; TEMP: This check is for litellm compatibility, should be removed + (not (equal (plist-get func :name) "null"))) ; new tool block begins (progn (when-let* ((partial (plist-get info :partial_json))) (let* ((prev-tool-call (car (plist-get info :tool-use))) @@ -306,6 +311,9 @@ Mutate state INFO with response metadata." (plist-put prompts-plist (if reasoning-model-p :max_completion_tokens :max_tokens) gptel-max-tokens)) + (when gptel--schema + (plist-put prompts-plist + :response_format (gptel--parse-schema backend gptel--schema))) ;; Merge request params with model and backend params. (gptel--merge-plists prompts-plist @@ -313,6 +321,14 @@ Mutate state INFO with response metadata." (gptel-backend-request-params gptel-backend) (gptel--model-request-params gptel-model)))) +(cl-defmethod gptel--parse-schema ((_backend gptel-openai) schema) + (list :type "json_schema" + :json_schema + (list :name (md5 (format "%s" (random))) + :schema (gptel--preprocess-schema + (gptel--dispatch-schema-type schema)) + :strict t))) + ;; NOTE: No `gptel--parse-tools' method required for gptel-openai, since this is ;; handled by its defgeneric implementation diff --git a/gptel-org.el b/gptel-org.el index caa870b6..19bc62e9 100644 --- a/gptel-org.el +++ b/gptel-org.el @@ -171,8 +171,8 @@ adding elements to this list can significantly slow down (defun gptel-org-set-topic (topic) "Set a TOPIC and limit this conversation to the current heading. -This limits the context sent to the LLM to the text between the -current heading and the cursor position." +This limits the context sent to the LLM to the text between the current +heading (i.e. the heading with the topic set) and the cursor position." (interactive (list (progn @@ -248,6 +248,8 @@ depend on the value of `gptel-org-branching-context', which see." (gptel-org--unescape-tool-results) (gptel-org--strip-block-headers) (when gptel-org-ignore-elements (gptel-org--strip-elements)) + (setq org-complex-heading-regexp ;For org-element-context to run + (buffer-local-value 'org-complex-heading-regexp org-buf)) (current-buffer)))) ;; Create prompt the usual way (let ((org-buf (current-buffer)) @@ -256,6 +258,8 @@ depend on the value of `gptel-org-branching-context', which see." (gptel-org--unescape-tool-results) (gptel-org--strip-block-headers) (when gptel-org-ignore-elements (gptel-org--strip-elements)) + (setq org-complex-heading-regexp ;For org-element-context to run + (buffer-local-value 'org-complex-heading-regexp org-buf)) (current-buffer)))))) (defun gptel-org--strip-elements () diff --git a/gptel-rewrite.el b/gptel-rewrite.el index 0ed0819c..240cd19f 100644 --- a/gptel-rewrite.el +++ b/gptel-rewrite.el @@ -28,9 +28,12 @@ (defvar eldoc-documentation-functions) (defvar diff-entire-buffers) +(defvar ediff-window-setup-function) +(defvar ediff-split-window-function) (declare-function diff-no-select "diff") (declare-function rmc--add-key-description "rmc") +(declare-function ediff-setup-windows-plain "ediff-wind") ;; * User options @@ -310,6 +313,7 @@ BUF is the buffer to modify, defaults to the overlay buffer." (when-let* ((overlay-buffer ov)) (let ((disp (overlay-get ov 'display)) (stored (overlay-get ov 'gptel--ediff))) + (overlay-put ov 'face (and restore 'gptel-rewrite-highlight-face)) (overlay-put ov 'display (and restore stored)) (overlay-put ov 'gptel--ediff (unless restore disp))))))) (gptel--ediff-restore @@ -320,7 +324,45 @@ BUF is the buffer to modify, defaults to the overlay buffer." (remove-hook 'ediff-quit-hook gptel--ediff-restore)))) (funcall hideshow) (add-hook 'ediff-quit-hook gptel--ediff-restore) - (ediff-buffers ov-buf newbuf)))) + (let ((ediff-window-setup-function #'ediff-setup-windows-plain) + (ediff-split-window-function #'split-window-horizontally)) + (ediff-buffers ov-buf newbuf))))) + +(defun gptel--rewrite-merge-git (beg end new-str) + "Produce a merge conflict region between BEG and END. + +Merge the region with NEW-STR using git merge-file." + (let ((original-temp-file (make-temp-file "gptel-merge-")) + (empty-temp-file (make-temp-file "gptel-merge-")) ; use /dev/null? (windows?) + (new-temp-file (make-temp-file "gptel-merge-"))) + (unwind-protect + (progn (write-region beg end original-temp-file) + (with-temp-file empty-temp-file (insert "")) + (with-temp-file new-temp-file (insert new-str)) + (goto-char beg) + (delete-region beg end) + (call-process + "git" nil (list (current-buffer) nil) nil + "merge-file" "--no-diff3" "-L" "original" "-L" "Empty" "-L" + (gptel-backend-name gptel-backend) "-p" + original-temp-file empty-temp-file new-temp-file) + ;; Make merge marker active if required + (goto-char beg) (unless (bolp) (insert "\n"))) + (delete-file original-temp-file) + (delete-file empty-temp-file) + (delete-file new-temp-file)))) + +(defun gptel--rewrite-merge-simple (beg end new-str) + "Produce a merge conflict region between BEG and END. + +NEW-STR is the new string intended to replace the region." + (goto-char end) ;End first to preserve ordering + (unless (bolp) (insert "\n")) + (insert "=======\n" new-str "\n>>>>>>> " + (gptel-backend-name gptel-backend) "\n") + (goto-char beg) + (unless (bolp) (insert "\n")) + (insert-before-markers "<<<<<<< original\n")) (defun gptel--rewrite-merge (&optional ovs) "Insert pending LLM responses in OVS as merge conflicts." @@ -332,15 +374,9 @@ BUF is the buffer to modify, defaults to the overlay buffer." (dolist (ov (ensure-list ovs)) (save-excursion (when-let* ((new-str (overlay-get ov 'gptel-rewrite))) - ;; Insert merge - (goto-char (overlay-start ov)) - (unless (bolp) (insert "\n")) - (insert-before-markers "<<<<<<< original\n") - (goto-char (overlay-end ov)) - (unless (bolp) (insert "\n")) - (insert - "=======\n" new-str - "\n>>>>>>> " (gptel-backend-name gptel-backend) "\n") + (if (executable-find "git") ;Replace overlay content with merge result + (gptel--rewrite-merge-git (overlay-start ov) (overlay-end ov) new-str) + (gptel--rewrite-merge-simple (overlay-start ov) (overlay-end ov) new-str)) (setq changed t)))) (when changed (smerge-mode 1))) (gptel--rewrite-reject ovs)))) diff --git a/gptel-transient.el b/gptel-transient.el index 3d49bff1..5b0da5b8 100644 --- a/gptel-transient.el +++ b/gptel-transient.el @@ -793,7 +793,26 @@ Also format its value in the Transient menu." (gptel--inspect-query (gptel--suffix-send (cons "I" (transient-args transient-current-command))) - 'json)))]] + 'json)))] + ["Logging" + :if (lambda () (or gptel-log-level gptel-expert-commands)) + ("-l" "Log level" "-l" + :class gptel-lisp-variable + :variable gptel-log-level + :set-value gptel--set-with-scope + :display-nil "Off" + :prompt "Log level: " + :reader + (lambda (prompt _ _) + "Manage gptel's logging." + (let ((state (completing-read + prompt '("off" "info" "debug") nil t))) + (message "Log level set to %s" state) + (if (string= state "off") nil (intern state))))) + ("L" "Inspect Log" + (lambda () (interactive) + (pop-to-buffer (get-buffer-create gptel--log-buffer-name))) + :format " %k %d")]] [(gptel--suffix-send)] (interactive) (gptel--sanitize-model) diff --git a/gptel.el b/gptel.el index 4f70524e..24a1a086 100644 --- a/gptel.el +++ b/gptel.el @@ -35,7 +35,7 @@ ;; gptel supports: ;; ;; - The services ChatGPT, Azure, Gemini, Anthropic AI, Together.ai, Perplexity, -;; Anyscale, OpenRouter, Groq, PrivateGPT, DeepSeek, Cerebras, Github Models, +;; AI/ML API, Anyscale, OpenRouter, Groq, PrivateGPT, DeepSeek, Cerebras, Github Models, ;; GitHub Copilot chat, AWS Bedrock, Novita AI, xAI, Sambanova, Mistral Le ;; Chat and Kagi (FastGPT & Summarizer). ;; - Local models via Ollama, Llama.cpp, Llamafiles or GPT4All @@ -73,7 +73,7 @@ ;; - For Gemini: define a gptel-backend with `gptel-make-gemini', which see. ;; - For Anthropic (Claude): define a gptel-backend with `gptel-make-anthropic', ;; which see. -;; - For Together.ai, Anyscale, Groq, OpenRouter, DeepSeek, Cerebras or +;; - For AI/ML API, Together.ai, Anyscale, Groq, OpenRouter, DeepSeek, Cerebras or ;; Github Models: define a gptel-backend with `gptel-make-openai', which see. ;; - For PrivateGPT: define a backend with `gptel-make-privategpt', which see. ;; - For Perplexity: define a backend with `gptel-make-perplexity', which see. @@ -914,6 +914,11 @@ These parameters are combined with model-specific and backend-specific incompatible with the active backend can break gptel. Do not use this variable unless you know what you're doing!") +(defconst gptel--ersatz-json-tool "response_json" + "Name of ersatz tool used to force JSON output. + +Some APIs, like Anthropic, use a tool to produce structured JSON output.") + ;;; Utility functions @@ -991,7 +996,7 @@ Later plists in the sequence take precedence over earlier ones." (defun gptel--insert-file-string (path) "Insert at point the contents of the file at PATH as context." - (insert (format "In file `%s`:" (file-name-nondirectory path)) + (insert (format "In file `%s`:" (abbreviate-file-name path)) "\n\n```\n") (let ((pm (point-marker))) (set-marker-insertion-type pm t) @@ -1082,6 +1087,7 @@ Compatibility macro for Emacs 27.1." "Copy gptel's local variables from BUF to a temp buffer and run BODY. If positions START and END are provided, insert that part of BUF first." + (declare (indent 3)) `(gptel--with-buffer-copy-internal ,buf ,start ,end (lambda () ,@body))) (defun gptel--with-buffer-copy-internal (buf start end body-thunk) @@ -1092,7 +1098,7 @@ For BUF, START, END and BODY-THUNK see `gptel--with-buffer-copy'." (with-current-buffer temp-buffer (dolist (sym '( gptel-backend gptel--system-message gptel-model gptel-mode gptel-track-response gptel-track-media - gptel-use-tools gptel-tools gptel-use-curl + gptel-use-tools gptel-tools gptel-use-curl gptel--schema gptel-use-context gptel--num-messages-to-send gptel-stream gptel-include-reasoning gptel--request-params gptel-temperature gptel-max-tokens gptel-cache)) @@ -1576,6 +1582,60 @@ file." (declare-function gptel-context--wrap "gptel-context") + +;;; Structured output +(defvar gptel--schema nil + "Response output schema for backends that support it.") + +(cl-defgeneric gptel--parse-schema (_backend _schema) + "Parse JSON schema in a backend-appropriate way.") + +(defun gptel--dispatch-schema-type (schema) + "Convert SCHEMA to a valid elisp representation." + (when (stringp schema) + (setq schema (gptel--json-read-string schema))) + ;; The OpenAI and Anthropic APIs don't allow arrays at the root of the schema. + ;; Work around this by wrapping it in an object with the field "items". + ;; TODO(schema): Find some way to strip this extra layer from the response. + (if (member (plist-get schema :type) '("array" array)) + (list :type "object" + :properties (list :items schema) + :required ["items"] + :additionalProperties :json-false) + schema)) + +(defun gptel--preprocess-schema (spec) + "Set additionalProperties for objects in SPEC destructively. + +Convert symbol :types to strings." + ;; NOTE: Do not use `sequencep' here, as that covers strings too and breaks + ;; things. + (when (or (listp spec) (vectorp spec)) + (cond + ((vectorp spec) + (cl-loop for element across spec + for idx upfrom 0 + do (aset spec idx (gptel--preprocess-schema element)))) + ((keywordp (car spec)) + (let ((tail spec)) + (while tail + (when (eq (car tail) :type) + (when (symbolp (cadr tail)) ;Convert symbol :type to string + (setcar (cdr tail) (symbol-name (cadr tail)))) + (when (equal (cadr tail) "object") ;Add additional object fields + (plist-put tail :additionalProperties :json-false) + (let ((props + (cl-loop for prop in (plist-get tail :properties) by #'cddr + collect (substring (symbol-name prop) 1)))) + (plist-put tail :required (vconcat props))))) + (when (or (listp (cadr tail)) (vectorp (cadr tail))) + (gptel--preprocess-schema (cadr tail))) + (setq tail (cddr tail))))) + ((listp spec) (dolist (element spec) + (when (listp element) + (gptel--preprocess-schema element)))))) + spec) + ;;; Tool use @@ -1867,7 +1927,7 @@ implementation, used by OpenAI-compatible APIs and Ollama." (plist-get arg :name))) (gptel-tool-args tool)))) :additionalProperties :json-false)) - (list :parameters :null))))) + (list :parameters (list :type "object" :properties nil)))))) (ensure-list tools)))) (cl-defgeneric gptel--parse-tool-results (backend results) @@ -2242,7 +2302,12 @@ Run post-response hooks." (cons 'tool-result result-alist) info) (gptel--fsm-transition fsm))))) (if (null tool-spec) - (message "Unknown tool called by model: %s" name) + (if (equal name gptel--ersatz-json-tool) ;Could be a JSON response + ;; Handle structured JSON output supplied as tool call + (funcall (plist-get info :callback) + (gptel--json-encode (plist-get tool-call :args)) + info) + (message "Unknown tool called by model: %s" name)) (setq arg-values (mapcar (lambda (arg) @@ -2277,11 +2342,9 @@ Run post-response hooks." (defun gptel--error-p (info) (plist-get info :error)) -(defun gptel--tool-use-p (info) - (and (plist-get info :tools) (plist-get info :tool-use))) +(defun gptel--tool-use-p (info) (plist-get info :tool-use)) -(defun gptel--tool-result-p (info) - (and (plist-get info :tools) (plist-get info :tool-success))) +(defun gptel--tool-result-p (info) (plist-get info :tool-success)) ;; TODO(prompt-list): Document new prompt input format to `gptel-request'. @@ -2292,7 +2355,7 @@ Run post-response hooks." position context dry-run (stream nil) (in-place nil) (system gptel--system-message) - transforms (fsm (gptel-make-fsm))) + schema transforms (fsm (gptel-make-fsm))) "Request a response from the `gptel-backend' for PROMPT. The request is asynchronous, this function returns immediately. @@ -2442,6 +2505,13 @@ additional information (such as from a RAG engine). and the state machine. It should run the callback after finishing its transformation. +If provided, SCHEMA forces the LLM to generate JSON output. Its value +is a JSON schema, which can be provided as an elisp object, a nested +plist structure. See the manual or the wiki for examples. + +Note: SCHEMA is presently experimental and subject to change, and not +all providers support structured output. + See `gptel-prompt-transform-functions' for more. FSM is the state machine driving the request. This can be used @@ -2470,6 +2540,7 @@ be used to rerun or continue the request at a later time." ((markerp position) position) ((integerp position) (set-marker (make-marker) position buffer)))) + (gptel--schema schema) (prompt-buffer (cond ;prompt from buffer or explicitly supplied ((null prompt) @@ -2702,6 +2773,8 @@ JSON query instead of the Lisp structure gptel uses." (make-composed-keymap (define-keymap "C-c C-c" #'gptel--continue-query + "C-c C-w" (lambda () "Copy Curl command for query." + (interactive) (gptel--continue-query 'copy)) "C-c C-k" #'quit-window) (current-local-map))) (unless header-line-format @@ -2710,17 +2783,24 @@ JSON query instead of the Lisp structure gptel uses." (concat "Edit request: \\[read-only-mode]," " Send request: \\[gptel--continue-query]," + (format " Copy Curl: %s" + (propertize "C-c C-w" 'face 'help-key-binding)) " Quit: \\[quit-window]")))) (display-buffer (current-buffer) gptel-display-buffer-action))))) -(defun gptel--continue-query () +(declare-function gptel-curl--get-args "gptel-curl") + +(defun gptel--continue-query (&optional copy) "Continue sending the gptel query displayed in this buffer. The request is continued with the same parameters as originally -specified." - (interactive nil lisp-data-mode fundamental-mode) +specified. + +With prefix arg COPY, copy the Curl command for the request to the +kill ring instead." + (interactive "P" lisp-data-mode fundamental-mode) (unless (equal (buffer-name) "*gptel-query*") - (user-error "This command is meant for use in a gptel dry-run buffer.")) + (user-error "This command is meant for use in a gptel dry-run buffer")) (save-excursion (goto-char (point-min)) (condition-case-unless-debug nil @@ -2729,8 +2809,16 @@ specified." (gptel--json-read)))) (cl-assert (cl-typep gptel--fsm-last 'gptel-fsm)) (plist-put (gptel-fsm-info gptel--fsm-last) :data data) - (gptel--fsm-transition gptel--fsm-last) ;INIT -> WAIT - (quit-window)) + (if copy ;Copy Curl command instead of sending request + (let ((args (and (require 'gptel-curl) + (gptel-curl--get-args (gptel-fsm-info gptel--fsm-last) + (md5 (format "%s" (random))))))) + (kill-new + (mapconcat #'shell-quote-argument + (cons (gptel--curl-path) args) " \\\n")) + (message "Curl command for request copied to kill-ring")) + (gptel--fsm-transition gptel--fsm-last) ;INIT -> WAIT + (quit-window))) (error (user-error "Can not resume request: could not read data from buffer!"))))) @@ -3670,7 +3758,7 @@ PRESET is a spec (plist) of keys and values." (while index (setq key (pop index) val (pop index)) (pcase key - (:description) + ((or :description :pre :post)) (:parents (mapc (lambda (parent-preset) (nconc syms (gptel--preset-syms diff --git a/test b/test index 3e644187..f33daff4 160000 --- a/test +++ b/test @@ -1 +1 @@ -Subproject commit 3e6441877dff02e71fb3538120094d994c5bd7df +Subproject commit f33daff49c5ebbf991473906e2c83f0a867e0d8d