cohere-ai · fern-api · Oct 15, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "cohere"
 
 [tool.poetry]
 name = "cohere"
-version = "5.18.0"
+version = "5.19.0"
 description = ""
 readme = "README.md"
 authors = []

diff --git a/src/cohere/base_client.py b/src/cohere/base_client.py
@@ -268,9 +268,8 @@ def chat_stream(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -406,7 +405,8 @@ def chat_stream(
             token="YOUR_TOKEN",
         )
         response = client.chat_stream(
-            message="hello world!",
+            model="command-a-03-2025",
+            message="hello!",
         )
         for chunk in response:
             yield chunk
@@ -568,9 +568,8 @@ def chat(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -699,27 +698,15 @@ def chat(
 
         Examples
         --------
-        from cohere import ChatbotMessage, ChatConnector, Client, UserMessage
+        from cohere import Client
 
         client = Client(
             client_name="YOUR_CLIENT_NAME",
             token="YOUR_TOKEN",
         )
         client.chat(
-            chat_history=[
-                UserMessage(
-                    message="Who discovered gravity?",
-                ),
-                ChatbotMessage(
-                    message="The man who is widely credited with discovering gravity is Sir Isaac Newton",
-                ),
-            ],
-            message="What year was he born?",
-            connectors=[
-                ChatConnector(
-                    id="web-search",
-                )
-            ],
+            model="command-a-03-2025",
+            message="Tell me about LLMs",
         )
         """
         _response = self._raw_client.chat(
@@ -1073,7 +1060,7 @@ def embed(
         images : typing.Optional[typing.Sequence[str]]
             An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
 
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.
 
             Images are only supported with Embed v3.0 and newer models.
 
@@ -1724,9 +1711,8 @@ async def chat_stream(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -1867,7 +1853,8 @@ async def chat_stream(
 
         async def main() -> None:
             response = await client.chat_stream(
-                message="hello world!",
+                model="command-a-03-2025",
+                message="hello!",
             )
             async for chunk in response:
                 yield chunk
@@ -2033,9 +2020,8 @@ async def chat(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -2166,7 +2152,7 @@ async def chat(
         --------
         import asyncio
 
-        from cohere import AsyncClient, ChatbotMessage, ChatConnector, UserMessage
+        from cohere import AsyncClient
 
         client = AsyncClient(
             client_name="YOUR_CLIENT_NAME",
@@ -2176,20 +2162,8 @@ async def chat(
 
         async def main() -> None:
             await client.chat(
-                chat_history=[
-                    UserMessage(
-                        message="Who discovered gravity?",
-                    ),
-                    ChatbotMessage(
-                        message="The man who is widely credited with discovering gravity is Sir Isaac Newton",
-                    ),
-                ],
-                message="What year was he born?",
-                connectors=[
-                    ChatConnector(
-                        id="web-search",
-                    )
-                ],
+                model="command-a-03-2025",
+                message="Tell me about LLMs",
             )
 
 
@@ -2563,7 +2537,7 @@ async def embed(
         images : typing.Optional[typing.Sequence[str]]
             An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
 
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.
 
             Images are only supported with Embed v3.0 and newer models.
 

diff --git a/src/cohere/core/client_wrapper.py b/src/cohere/core/client_wrapper.py
@@ -22,10 +22,10 @@ def __init__(
 
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "cohere/5.18.0",
+            "User-Agent": "cohere/5.19.0",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cohere",
-            "X-Fern-SDK-Version": "5.18.0",
+            "X-Fern-SDK-Version": "5.19.0",
         }
         if self._client_name is not None:
             headers["X-Client-Name"] = self._client_name

diff --git a/src/cohere/datasets/client.py b/src/cohere/datasets/client.py
@@ -120,7 +120,7 @@ def create(
             The name of the uploaded dataset.
 
         type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
+            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.
 
         data : core.File
             See core.File for more documentation
@@ -384,7 +384,7 @@ async def create(
             The name of the uploaded dataset.
 
         type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
+            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.
 
         data : core.File
             See core.File for more documentation

diff --git a/src/cohere/datasets/raw_client.py b/src/cohere/datasets/raw_client.py
@@ -265,7 +265,7 @@ def create(
             The name of the uploaded dataset.
 
         type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
+            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.
 
         data : core.File
             See core.File for more documentation
@@ -1207,7 +1207,7 @@ async def create(
             The name of the uploaded dataset.
 
         type : DatasetType
-            The dataset type, which is used to validate the data. Valid types are `embed-input`, `reranker-finetune-input`, `single-label-classification-finetune-input`, `chat-finetune-input`, and `multi-label-classification-finetune-input`.
+            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.
 
         data : core.File
             See core.File for more documentation

diff --git a/src/cohere/models/client.py b/src/cohere/models/client.py
@@ -66,7 +66,7 @@ def list(
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ListModelsResponse:
         """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
+        Returns a list of models available for use.
 
         Parameters
         ----------
@@ -175,7 +175,7 @@ async def list(
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ListModelsResponse:
         """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
+        Returns a list of models available for use.
 
         Parameters
         ----------

diff --git a/src/cohere/models/raw_client.py b/src/cohere/models/raw_client.py
@@ -210,7 +210,7 @@ def list(
         request_options: typing.Optional[RequestOptions] = None,
     ) -> HttpResponse[ListModelsResponse]:
         """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
+        Returns a list of models available for use.
 
         Parameters
         ----------
@@ -578,7 +578,7 @@ async def list(
         request_options: typing.Optional[RequestOptions] = None,
     ) -> AsyncHttpResponse[ListModelsResponse]:
         """
-        Returns a list of models available for use. The list contains models from Cohere as well as your fine-tuned models.
+        Returns a list of models available for use.
 
         Parameters
         ----------

diff --git a/src/cohere/raw_base_client.py b/src/cohere/raw_base_client.py
@@ -196,9 +196,8 @@ def chat_stream(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -663,9 +662,8 @@ def chat(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -1607,7 +1605,7 @@ def embed(
         images : typing.Optional[typing.Sequence[str]]
             An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
 
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.
 
             Images are only supported with Embed v3.0 and newer models.
 
@@ -3111,9 +3109,8 @@ async def chat_stream(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatStreamRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -3578,9 +3575,8 @@ async def chat(
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
         citation_quality : typing.Optional[ChatRequestCitationQuality]
-            Defaults to `"accurate"`.
-
-            Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
+            Defaults to `"enabled"`.
+            Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
 
             Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
 
@@ -4522,7 +4518,7 @@ async def embed(
         images : typing.Optional[typing.Sequence[str]]
             An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
 
-            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.
 
             Images are only supported with Embed v3.0 and newer models.
 

diff --git a/src/cohere/types/api_meta.py b/src/cohere/types/api_meta.py
@@ -14,6 +14,11 @@ class ApiMeta(UncheckedBaseModel):
     api_version: typing.Optional[ApiMetaApiVersion] = None
     billed_units: typing.Optional[ApiMetaBilledUnits] = None
     tokens: typing.Optional[ApiMetaTokens] = None
+    cached_tokens: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    The number of prompt tokens that hit the inference cache.
+    """
+
     warnings: typing.Optional[typing.List[str]] = None
 
     if IS_PYDANTIC_V2:

diff --git a/src/cohere/types/chat_request_citation_quality.py b/src/cohere/types/chat_request_citation_quality.py
@@ -2,4 +2,4 @@
 
 import typing
 
-ChatRequestCitationQuality = typing.Union[typing.Literal["fast", "accurate", "off"], typing.Any]
+ChatRequestCitationQuality = typing.Union[typing.Literal["ENABLED", "DISABLED", "FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/types/chat_stream_request_citation_quality.py b/src/cohere/types/chat_stream_request_citation_quality.py
@@ -2,4 +2,6 @@
 
 import typing
 
-ChatStreamRequestCitationQuality = typing.Union[typing.Literal["fast", "accurate", "off"], typing.Any]
+ChatStreamRequestCitationQuality = typing.Union[
+    typing.Literal["ENABLED", "DISABLED", "FAST", "ACCURATE", "OFF"], typing.Any
+]
diff --git a/src/cohere/types/citation_options.py b/src/cohere/types/citation_options.py
@@ -15,10 +15,8 @@ class CitationOptions(UncheckedBaseModel):
 
     mode: typing.Optional[CitationOptionsMode] = pydantic.Field(default=None)
     """
-    Defaults to `"accurate"`.
-    Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results.
-
-    **Note**: `command-r7b-12-2024` and `command-a-03-2025` only support `"fast"` and `"off"` modes. The default is `"fast"`.
+    Defaults to `"enabled"`.
+    Citations are enabled by default for models that support it, but can be turned off by setting `"type": "disabled"`.
     """
 
     if IS_PYDANTIC_V2:

diff --git a/src/cohere/types/citation_options_mode.py b/src/cohere/types/citation_options_mode.py
@@ -2,4 +2,4 @@
 
 import typing
 
-CitationOptionsMode = typing.Union[typing.Literal["FAST", "ACCURATE", "OFF"], typing.Any]
+CitationOptionsMode = typing.Union[typing.Literal["ENABLED", "DISABLED", "FAST", "ACCURATE", "OFF"], typing.Any]
diff --git a/src/cohere/types/dataset_type.py b/src/cohere/types/dataset_type.py
@@ -12,6 +12,10 @@
         "single-label-classification-finetune-input",
         "chat-finetune-input",
         "multi-label-classification-finetune-input",
+        "batch-chat-input",
+        "batch-openai-chat-input",
+        "batch-embed-v2-input",
+        "batch-chat-v2-input",
     ],
     typing.Any,
 ]
diff --git a/src/cohere/types/get_model_response.py b/src/cohere/types/get_model_response.py
@@ -43,11 +43,6 @@ class GetModelResponse(UncheckedBaseModel):
     Public URL to the tokenizer's configuration file.
     """
 
-    supports_vision: typing.Optional[bool] = pydantic.Field(default=None)
-    """
-    Whether the model supports image inputs or not.
-    """
-
     default_endpoints: typing.Optional[typing.List[CompatibleEndpoint]] = pydantic.Field(default=None)
     """
     The API endpoints that the model is default to.

diff --git a/src/cohere/types/usage.py b/src/cohere/types/usage.py
@@ -5,13 +5,13 @@
 import pydantic
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
 from ..core.unchecked_base_model import UncheckedBaseModel
-from .usage_billed_units import UsageBilledUnits
-from .usage_tokens import UsageTokens
 
 
 class Usage(UncheckedBaseModel):
-    billed_units: typing.Optional[UsageBilledUnits] = None
-    tokens: typing.Optional[UsageTokens] = None
+    cached_tokens: typing.Optional[float] = pydantic.Field(default=None)
+    """
+    The number of prompt tokens that hit the inference cache.
+    """
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow")  # type: ignore # Pydantic v2
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@

		import typing

		ChatRequestCitationQuality = typing.Union[typing.Literal["fast", "accurate", "off"], typing.Any]
		ChatRequestCitationQuality = typing.Union[typing.Literal["ENABLED", "DISABLED", "FAST", "ACCURATE", "OFF"], typing.Any]