microsoft · drandahl · Oct 31, 2025 · Copilot · Oct 31, 2025 · Copilot
diff --git a/python/.env.example b/python/.env.example
@@ -24,6 +24,8 @@ ANTHROPIC_MODEL=""
 # Ollama
 OLLAMA_ENDPOINT=""
 OLLAMA_MODEL=""
+# LiteLLM
+LITE_LLM_MODEL_ID=""
 # Observability
 ENABLE_OTEL=true
 ENABLE_SENSITIVE_DATA=true

diff --git a/python/packages/core/pyproject.toml b/python/packages/core/pyproject.toml
@@ -45,6 +45,7 @@ all = [
     "agent-framework-mem0",
     "agent-framework-redis",
     "agent-framework-devui",
+    "agent-framework-litellm"
 ]
 
 [tool.uv]

diff --git a/python/packages/litellm/LICENSE b/python/packages/litellm/LICENSE
@@ -0,0 +1,21 @@
+    MIT License
+
+    Copyright (c) Microsoft Corporation.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE
diff --git a/python/packages/litellm/README.md b/python/packages/litellm/README.md
@@ -0,0 +1,22 @@
+# Get Started with Microsoft Agent Framework and LiteLLM
+
+Please install this package via pip:
+
+```bash
+pip install agent-framework-litellm --pre
+```
+
+and see the [README](https://github.com/microsoft/agent-framework/tree/main/python/README.md) for more information.
+
+
+
+# Configuration
+- LiteLLM AF clients can be configured similarly to the pure LiteLLM clients (see https://docs.litellm.ai/docs/ for more information).
+
+ As with other clients, configuration can be provided either on class instantiation or via environment variables:
+- Set `api_key` and `api_base` for your provider, or per LiteLLM env var instructions (varied settings per provider). See https://docs.litellm.ai/docs/ for more information.
+- `model_id` (param) or `LITE_LLM_MODEL_ID` (env var): The provider+model used for chat and responses clients (e.g. `azure_ai/gpt-4o-mini`)
+
+# Development Notes
+
+* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
-* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
+* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment variables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
-* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
+* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL_ID" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
-* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
+* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment variables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
-* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
+* LiteLLM doesn't have a generic way of setting config for model id (see https://docs.litellm.ai/docs/set_keys for more information on available environment vairiables). For this reason, we've added the "LITE_LLM_MODEL_ID" env param which may be set. The integration still supports LiteLLM's list of provider-specific environment variables.
diff --git a/python/packages/litellm/agent_framework_litellm/__init__.py b/python/packages/litellm/agent_framework_litellm/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import importlib.metadata
+
+from ._chat_client import LiteLlmChatClient
+from ._responses_client import LiteLlmResponsesClient
+
+try:
+    __version__ = importlib.metadata.version(__name__)
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0"  # Fallback for development mode
+
+__all__ = [
+    "LiteLlmChatClient",
+    "LiteLlmResponsesClient",
+    "__version__",
+]
diff --git a/python/packages/litellm/agent_framework_litellm/_chat_client.py b/python/packages/litellm/agent_framework_litellm/_chat_client.py
@@ -0,0 +1,178 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from collections.abc import AsyncIterable, MutableSequence
+from typing import Any, ClassVar, cast
+
+from agent_framework import (
+    get_logger,
+    use_chat_middleware,
+    use_function_invocation,
+)
+from agent_framework._pydantic import AFBaseSettings
+from agent_framework._types import ChatMessage, ChatOptions, ChatResponse, ChatResponseUpdate
+from agent_framework.exceptions import ServiceInitializationError, ServiceResponseException
+from agent_framework.observability import use_observability
+from agent_framework.openai._chat_client import OpenAIBaseChatClient
+from litellm import CustomStreamWrapper, ModelResponse, ModelResponseStream, completion  # type: ignore
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from pydantic import ValidationError
+
+
+class LiteLlmCompletionAISettings(AFBaseSettings):
-class LiteLlmCompletionAISettings(AFBaseSettings):
+class LiteLlmSettings(AFBaseSettings):
-class LiteLlmCompletionAISettings(AFBaseSettings):
+class LiteLlmSettings(AFBaseSettings):
+    """LiteLLM AI Completion settings.
+
+    The settings are first loaded from environment variables with the prefix 'LITE_LLM_'.
+    If the environment variables are not found, the settings can be loaded from a .env file
+    with the encoding 'utf-8'. If the settings are not found in the .env file, the settings
+    are ignored; however, validation will fail alerting that the settings are missing.
+
+    Keyword Args:
+        model_id: The model to use, including both the provider and model (e.g., "azure_openai/gpt-4").
+        env_file_path: If provided, the .env settings are read from this file path location.
+        env_file_encoding: The encoding of the .env file, defaults to 'utf-8'.
+
+    Examples:
+        .. code-block:: python
+
+            from agent_framework_litellm import LiteLlmAISettings
+
+            # Using environment variables
+            # Set LITE_LLM_MODEL_ID=azure_openai/gpt-4
+            settings = LiteLlmAISettings()
+
+            # Or passing parameters directly
+            settings = LiteLlmAISettings(model_id="azure_openai/gpt-4")
+
+            # Or loading from a .env file
+            settings = LiteLlmAISettings(env_file_path="path/to/.env")
+    """
+
+    env_prefix: ClassVar[str] = "LITE_LLM_"
+
+    model_id: str | None = None
+
+
+logger = get_logger("agent_framework.litellm")
+
+
+@use_function_invocation
+@use_observability
+@use_chat_middleware
+class LiteLlmChatClient(OpenAIBaseChatClient):
+    """LiteLLM Chat client.
+
+    This client is used to interact with LiteLLM models via the Agent Framework. Note that LiteLLM is not fully OpenAI
+    API compatible, so some features may not be supported. However, LiteLLM follows the OpenAI API structure closely
+    enough to allow for basic interactions.
+    """
+
+    OTEL_PROVIDER_NAME: ClassVar[str] = "litellm"  # type: ignore[reportIncompatibleVariableOverride, misc]
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        api_base: str | None = None,
+        env_file_path: str | None = None,
+        env_file_encoding: str | None = "utf-8",
+        model_id: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        self.api_key = api_key
+        self.api_base = api_base
+
+        try:
+            lite_llm_settings = LiteLlmCompletionAISettings(
+                model_id=model_id,
+                env_file_path=env_file_path,
+                env_file_encoding=env_file_encoding,
+            )
+        except ValidationError as ex:
+            raise ServiceInitializationError("Failed to create LiteLLM settings.", ex) from ex
+
+        if lite_llm_settings.model_id is None:
+            raise ServiceInitializationError(
+                "model_id parameter, or LITE_LLM_MODEL_ID env variable must be provided for LiteLlmChatClient"
+            )
+
+        self.model_id = str(lite_llm_settings.model_id)
+
+        super().__init__(api_base=api_base, api_key=api_key, model_id=self.model_id, client=None, **kwargs)  # type: ignore
+
+    def make_completion_streaming_request(self, **options_dict: Any) -> CustomStreamWrapper:
-    def make_completion_streaming_request(self, **options_dict: Any) -> CustomStreamWrapper:
+    def _make_completion_streaming_request(self, **options_dict: Any) -> CustomStreamWrapper:
-    def make_completion_streaming_request(self, **options_dict: Any) -> CustomStreamWrapper:
+    def _make_completion_streaming_request(self, **options_dict: Any) -> CustomStreamWrapper:
+        options_dict["model"] = options_dict.pop("model_id")
+        lite_llm_response = completion(stream=True, **options_dict)
+        # Completion conditionally returns this depending on streaming vs non-streaming
+        return cast(CustomStreamWrapper, lite_llm_response)
+
+    def make_completion_nonstreaming_request(self, **options_dict: Any) -> ModelResponse:
-    def make_completion_nonstreaming_request(self, **options_dict: Any) -> ModelResponse:
+    def _make_completion_nonstreaming_request(self, **options_dict: Any) -> ModelResponse:
-    def make_completion_nonstreaming_request(self, **options_dict: Any) -> ModelResponse:
+    def _make_completion_nonstreaming_request(self, **options_dict: Any) -> ModelResponse:
+        options_dict["model"] = options_dict.pop("model_id")
+        lite_llm_response = completion(stream=False, **options_dict)
+        # Completion conditionally returns this depending on streaming vs non-streaming
+        return cast(ModelResponse, lite_llm_response)
+
+    def lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
-    def lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
+    def _lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
-    def lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
+    def _lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
+        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI A  I,
-        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI A  I,
+        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI API,
-        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI A  I,
+        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI API,
+        # however in the future there may be differences that need to be accounted for here.
+        return cast(ChatCompletion, response)
+
-    def lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
-        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI A  I,
-        # however in the future there may be differences that need to be accounted for here.
-        return cast(ChatCompletion, response)
-    def lite_llm_to_openai_completion(self, response: ModelResponse) -> ChatCompletion:
-        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI A  I,
-        # however in the future there may be differences that need to be accounted for here.
-        return cast(ChatCompletion, response)
+    def lite_llm_event_to_openai_event(self, event: ModelResponseStream) -> ChatCompletionChunk:
-    def lite_llm_event_to_openai_event(self, event: ModelResponseStream) -> ChatCompletionChunk:
+    def _lite_llm_event_to_openai_event(self, event: ModelResponseStream) -> ChatCompletionChunk:
-    def lite_llm_event_to_openai_event(self, event: ModelResponseStream) -> ChatCompletionChunk:
+    def _lite_llm_event_to_openai_event(self, event: ModelResponseStream) -> ChatCompletionChunk:
+        # Convert a LiteLLM ResponsesAPIResponse to an OpenAI Response. LiteLLM aims to match the OpenAI API,
+        # however in the future there may be differences that need to be accounted for here.
+        openai_event = cast(ChatCompletionChunk, event)
+
+        # LiteLLM does not providet this as a first-class field, so we map it here.
-        # LiteLLM does not providet this as a first-class field, so we map it here.
+        # LiteLLM does not provide this as a first-class field, so we map it here.
-        # LiteLLM does not providet this as a first-class field, so we map it here.
+        # LiteLLM does not provide this as a first-class field, so we map it here.
+        openai_event.usage = event.get("usage", None)
+        return openai_event
+
+    def lite_llm_to_openai_response(self, response: ModelResponse) -> ChatCompletion:
-    def lite_llm_to_openai_response(self, response: ModelResponse) -> ChatCompletion:
+    def _lite_llm_to_openai_response(self, response: ModelResponse) -> ChatCompletion:
-    def lite_llm_to_openai_response(self, response: ModelResponse) -> ChatCompletion:
+    def _lite_llm_to_openai_response(self, response: ModelResponse) -> ChatCompletion:
+        """Convert a LiteLLM ModelResponse to an OpenAI ChatCompletion."""
+        # OpenAI parsing code currently directly checks for OpenAI classes. However,
+        # LiteLLM implements the OpenAI API via its own classes, compatable via
-        # LiteLLM implements the OpenAI API via its own classes, compatable via
+        # LiteLLM implements the OpenAI API via its own classes, compatible via
-        # LiteLLM implements the OpenAI API via its own classes, compatable via
+        # LiteLLM implements the OpenAI API via its own classes, compatible via
+        # duck typing. Therefore, we need to convert the LiteLLM ModelResponse
+        # to an OpenAI ChatCompletion for compatibility.
+
+        # You may need to adapt this mapping based on the actual structure of ModelResponse
+        # and the expected fields of ChatCompletion.
+        # Here is a basic example assuming similar structure:
+        return ChatCompletion(**response.dict())
+
+    async def _inner_get_response(
+        self,
+        *,
+        messages: MutableSequence[ChatMessage],
+        chat_options: ChatOptions,
+        **kwargs: Any,
+    ) -> ChatResponse:
+        options_dict = self._prepare_options(messages, chat_options)
+        options_dict["model_id"] = self.model_id
+
+        response = self.make_completion_nonstreaming_request(**options_dict)
+
+        open_ai_response = self.lite_llm_to_openai_response(response)
+
+        return self._create_chat_response(open_ai_response, chat_options)
+
+    async def _inner_get_streaming_response(
+        self,
+        *,
+        messages: MutableSequence[ChatMessage],
+        chat_options: ChatOptions,
+        **kwargs: Any,
+    ) -> AsyncIterable[ChatResponseUpdate]:
+        options_dict = self._prepare_options(messages, chat_options)
+        options_dict["model_id"] = self.model_id
+        options_dict["stream_options"] = {"include_usage": True}
+        try:
+            for event in self.make_completion_streaming_request(**options_dict):
+                chunk = self.lite_llm_event_to_openai_event(event)
+
+                if len(chunk.choices) == 0:
+                    continue
+                yield self._create_chat_response_update(chunk)
+        except Exception as ex:
+            raise ServiceResponseException(
+                f"{type(self)} service failed to complete the prompt: {ex}",
+                inner_exception=ex,
+            ) from ex