vertex ai anthropic thinking param support (BerriAI#8853)

krrishdholakia · web-flow · commit 88eedb22b967 · 2025-02-26T21:37:18.000-08:00
* fix(vertex_llm_base.py): handle credentials passed in as dictionary

* fix(router.py): support vertex credentials as json dict

* test(test_vertex.py): allows easier testing

mock anthropic thinking response for vertex ai

* test(vertex_ai_partner_models/): don't remove "@" from model

breaks anthropic cost calculation

* test: move testing

* fix: fix linting error

* fix: fix linting error

* fix(vertex_ai_partner_models/main.py): split @ for codestral model

* test: fix test

* fix: fix stripping "@" on mistral models

* fix: fix test

* test: fix test
diff --git a/.gitignore b/.gitignore
@@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html
 litellm/proxy/_experimental/out/model_hub.html
 .mypy_cache/*
 litellm/proxy/application.log
+tests/llm_translation/vertex_test_account.json
+tests/llm_translation/test_vertex_key.json
diff --git a/litellm/llms/vertex_ai/batches/handler.py b/litellm/llms/vertex_ai/batches/handler.py
@@ -10,7 +10,10 @@
 )
 from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
 from litellm.types.llms.openai import Batch, CreateBatchRequest
-from litellm.types.llms.vertex_ai import VertexAIBatchPredictionJob
+from litellm.types.llms.vertex_ai import (
+    VERTEX_CREDENTIALS_TYPES,
+    VertexAIBatchPredictionJob,
+)
 
 from .transformation import VertexAIBatchTransformation
 
@@ -25,7 +28,7 @@ def create_batch(
         _is_async: bool,
         create_batch_data: CreateBatchRequest,
         api_base: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         timeout: Union[float, httpx.Timeout],
@@ -130,7 +133,7 @@ def retrieve_batch(
         _is_async: bool,
         batch_id: str,
         api_base: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         timeout: Union[float, httpx.Timeout],
diff --git a/litellm/llms/vertex_ai/files/handler.py b/litellm/llms/vertex_ai/files/handler.py
@@ -9,6 +9,7 @@
 )
 from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.types.llms.openai import CreateFileRequest, FileObject
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
 from .transformation import VertexAIFilesTransformation
 
@@ -34,7 +35,7 @@ async def async_create_file(
         self,
         create_file_data: CreateFileRequest,
         api_base: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         timeout: Union[float, httpx.Timeout],
@@ -70,7 +71,7 @@ def create_file(
         _is_async: bool,
         create_file_data: CreateFileRequest,
         api_base: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         timeout: Union[float, httpx.Timeout],
diff --git a/litellm/llms/vertex_ai/fine_tuning/handler.py b/litellm/llms/vertex_ai/fine_tuning/handler.py
@@ -13,6 +13,7 @@
 from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters
 from litellm.types.llms.openai import FineTuningJobCreate
 from litellm.types.llms.vertex_ai import (
+    VERTEX_CREDENTIALS_TYPES,
     FineTuneHyperparameters,
     FineTuneJobCreate,
     FineTunesupervisedTuningSpec,
@@ -222,7 +223,7 @@ def create_fine_tuning_job(
         create_fine_tuning_job_data: FineTuningJobCreate,
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         api_base: Optional[str],
         timeout: Union[float, httpx.Timeout],
         kwargs: Optional[dict] = None,
diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@@ -40,6 +40,7 @@
     ChatCompletionUsageBlock,
 )
 from litellm.types.llms.vertex_ai import (
+    VERTEX_CREDENTIALS_TYPES,
     Candidates,
     ContentType,
     FunctionCallingConfig,
@@ -930,7 +931,7 @@ async def async_streaming(
         client: Optional[AsyncHTTPHandler] = None,
         vertex_project: Optional[str] = None,
         vertex_location: Optional[str] = None,
-        vertex_credentials: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
     ) -> CustomStreamWrapper:
@@ -1018,7 +1019,7 @@ async def async_completion(
         client: Optional[AsyncHTTPHandler] = None,
         vertex_project: Optional[str] = None,
         vertex_location: Optional[str] = None,
-        vertex_credentials: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
     ) -> Union[ModelResponse, CustomStreamWrapper]:
@@ -1123,7 +1124,7 @@ def completion(
         timeout: Optional[Union[float, httpx.Timeout]],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         gemini_api_key: Optional[str],
         litellm_params: dict,
         logger_fn=None,
diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
@@ -11,6 +11,7 @@
     get_async_httpx_client,
 )
 from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 from litellm.types.utils import ImageResponse
 
 
@@ -44,7 +45,7 @@ def image_generation(
         prompt: str,
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         model_response: ImageResponse,
         logging_obj: Any,
         model: Optional[
@@ -139,7 +140,7 @@ async def aimage_generation(
         prompt: str,
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         model_response: litellm.ImageResponse,
         logging_obj: Any,
         model: Optional[
diff --git a/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py
@@ -9,6 +9,7 @@
 )
 from litellm.llms.openai.openai import HttpxBinaryResponseContent
 from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
 
 class VertexInput(TypedDict, total=False):
@@ -45,7 +46,7 @@ def audio_speech(
         logging_obj,
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         api_base: Optional[str],
         timeout: Union[float, httpx.Timeout],
         model: str,
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py
@@ -160,7 +160,8 @@ def completion(
                 url=default_api_base,
             )
 
-            model = model.split("@")[0]
+            if "codestral" in model or "mistral" in model:
+                model = model.split("@")[0]
 
             if "codestral" in model and litellm_params.get("text_completion") is True:
                 optional_params["model"] = model
diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -41,7 +41,7 @@ def embedding(
         client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
         vertex_project: Optional[str] = None,
         vertex_location: Optional[str] = None,
-        vertex_credentials: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
     ) -> EmbeddingResponse:
@@ -148,7 +148,7 @@ async def async_embedding(
         client: Optional[AsyncHTTPHandler] = None,
         vertex_project: Optional[str] = None,
         vertex_location: Optional[str] = None,
-        vertex_credentials: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
         encoding=None,
diff --git a/litellm/llms/vertex_ai/vertex_llm_base.py b/litellm/llms/vertex_ai/vertex_llm_base.py
@@ -12,6 +12,7 @@
 from litellm.litellm_core_utils.asyncify import asyncify
 from litellm.llms.base import BaseLLM
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
 from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes
 
@@ -34,37 +35,44 @@ def get_vertex_region(self, vertex_region: Optional[str]) -> str:
         return vertex_region or "us-central1"
 
     def load_auth(
-        self, credentials: Optional[str], project_id: Optional[str]
+        self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str]
     ) -> Tuple[Any, str]:
         import google.auth as google_auth
         from google.auth import identity_pool
         from google.auth.transport.requests import (
             Request,  # type: ignore[import-untyped]
         )
 
-        if credentials is not None and isinstance(credentials, str):
+        if credentials is not None:
             import google.oauth2.service_account
 
-            verbose_logger.debug(
-                "Vertex: Loading vertex credentials from %s", credentials
-            )
-            verbose_logger.debug(
-                "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s",
-                credentials,
-                os.path.exists(credentials),
-                os.getcwd(),
-            )
+            if isinstance(credentials, str):
+                verbose_logger.debug(
+                    "Vertex: Loading vertex credentials from %s", credentials
+                )
+                verbose_logger.debug(
+                    "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s",
+                    credentials,
+                    os.path.exists(credentials),
+                    os.getcwd(),
+                )
 
-            try:
-                if os.path.exists(credentials):
-                    json_obj = json.load(open(credentials))
-                else:
-                    json_obj = json.loads(credentials)
-            except Exception:
-                raise Exception(
-                    "Unable to load vertex credentials from environment. Got={}".format(
-                        credentials
+                try:
+                    if os.path.exists(credentials):
+                        json_obj = json.load(open(credentials))
+                    else:
+                        json_obj = json.loads(credentials)
+                except Exception:
+                    raise Exception(
+                        "Unable to load vertex credentials from environment. Got={}".format(
+                            credentials
+                        )
                     )
+            elif isinstance(credentials, dict):
+                json_obj = credentials
+            else:
+                raise ValueError(
+                    "Invalid credentials type: {}".format(type(credentials))
                 )
 
             # Check if the JSON object contains Workload Identity Federation configuration
@@ -109,7 +117,7 @@ def refresh_auth(self, credentials: Any) -> None:
 
     def _ensure_access_token(
         self,
-        credentials: Optional[str],
+        credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         project_id: Optional[str],
         custom_llm_provider: Literal[
             "vertex_ai", "vertex_ai_beta", "gemini"
@@ -202,7 +210,7 @@ def _get_token_and_url(
         gemini_api_key: Optional[str],
         vertex_project: Optional[str],
         vertex_location: Optional[str],
-        vertex_credentials: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         stream: Optional[bool],
         custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
         api_base: Optional[str],
@@ -253,7 +261,7 @@ def _get_token_and_url(
 
     async def _ensure_access_token_async(
         self,
-        credentials: Optional[str],
+        credentials: Optional[VERTEX_CREDENTIALS_TYPES],
         project_id: Optional[str],
         custom_llm_provider: Literal[
             "vertex_ai", "vertex_ai_beta", "gemini"
diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py b/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py
@@ -6,6 +6,7 @@
 from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import (
     VertexPassThroughCredentials,
 )
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 
 
 class VertexPassThroughRouter:
@@ -58,7 +59,7 @@ def add_vertex_credentials(
         self,
         project_id: str,
         location: str,
-        vertex_credentials: str,
+        vertex_credentials: VERTEX_CREDENTIALS_TYPES,
     ):
         """
         Add the vertex credentials for the given project-id, location
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
@@ -481,3 +481,6 @@ class VertexBatchPredictionResponse(TypedDict, total=False):
     createTime: str
     updateTime: str
     modelVersionId: str
+
+
+VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]]
diff --git a/litellm/types/passthrough_endpoints/vertex_ai.py b/litellm/types/passthrough_endpoints/vertex_ai.py
@@ -6,6 +6,8 @@
 
 from pydantic import BaseModel
 
+from ..llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+
 
 class VertexPassThroughCredentials(BaseModel):
     # Example: vertex_project = "my-project-123"
@@ -15,4 +17,4 @@ class VertexPassThroughCredentials(BaseModel):
     vertex_location: Optional[str] = None
 
     # Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS"
-    vertex_credentials: Optional[str] = None
+    vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None
diff --git a/litellm/types/router.py b/litellm/types/router.py
@@ -18,6 +18,7 @@
 from ..exceptions import RateLimitError
 from .completion import CompletionRequest
 from .embedding import EmbeddingRequest
+from .llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
 from .utils import ModelResponse, ProviderSpecificModelInfo
 
 
@@ -171,7 +172,7 @@ class GenericLiteLLMParams(BaseModel):
     ## VERTEX AI ##
     vertex_project: Optional[str] = None
     vertex_location: Optional[str] = None
-    vertex_credentials: Optional[str] = None
+    vertex_credentials: Optional[Union[str, dict]] = None
     ## AWS BEDROCK / SAGEMAKER ##
     aws_access_key_id: Optional[str] = None
     aws_secret_access_key: Optional[str] = None
@@ -213,7 +214,7 @@ def __init__(
         ## VERTEX AI ##
         vertex_project: Optional[str] = None,
         vertex_location: Optional[str] = None,
-        vertex_credentials: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
         ## AWS BEDROCK / SAGEMAKER ##
         aws_access_key_id: Optional[str] = None,
         aws_secret_access_key: Optional[str] = None,
diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py

Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,8 @@ def completion(`
`160`	`160`	`url=default_api_base,`
`161`	`161`	`)`
`162`	`162`
`163`		`- model = model.split("@")[0]`
	`163`	`+ if "codestral" in model or "mistral" in model:`
	`164`	`+ model = model.split("@")[0]`
`164`	`165`
`165`	`166`	`if "codestral" in model and litellm_params.get("text_completion") is True:`
`166`	`167`	`optional_params["model"] = model`