Merge pull request BerriAI#9331 from BerriAI/litellm_patch_disable_spend_updates

ishaan-jaff · web-flow · commit b5c32c913bc1 · 2025-03-17T22:22:09.000-07:00
[Patch] - Allow disabling all spend updates / writes to DB
diff --git a/litellm/proxy/hooks/proxy_track_cost_callback.py b/litellm/proxy/hooks/proxy_track_cost_callback.py
@@ -13,6 +13,7 @@
 from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.auth.auth_checks import log_db_metrics
+from litellm.proxy.utils import ProxyUpdateSpend
 from litellm.types.utils import (
     StandardLoggingPayload,
     StandardLoggingUserAPIKeyMetadata,
@@ -230,6 +231,11 @@ def _should_track_cost_callback(
     """
     Determine if the cost callback should be tracked based on the kwargs
     """
+
+    # don't run track cost callback if user opted into disabling spend
+    if ProxyUpdateSpend.disable_spend_updates() is True:
+        return False
+
     if (
         user_api_key is not None
         or user_id is not None
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -23,6 +23,7 @@
     get_origin,
     get_type_hints,
 )
+
 from litellm.types.utils import (
     ModelResponse,
     ModelResponseStream,
@@ -254,6 +255,7 @@ def generate_feedback_box():
 from litellm.proxy.utils import (
     PrismaClient,
     ProxyLogging,
+    ProxyUpdateSpend,
     _cache_user_row,
     _get_docs_url,
     _get_projected_spend_over_limit,
@@ -924,6 +926,8 @@ async def update_database(  # noqa: PLR0915
         verbose_proxy_logger.debug(
             f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}; team_id: {team_id}"
         )
+        if ProxyUpdateSpend.disable_spend_updates() is True:
+            return
         if token is not None and isinstance(token, str) and token.startswith("sk-"):
             hashed_token = hash_token(token=token)
         else:
@@ -3047,7 +3051,11 @@ async def async_data_generator(
 ):
     verbose_proxy_logger.debug("inside generator")
     try:
-        async for chunk in proxy_logging_obj.async_post_call_streaming_iterator_hook(user_api_key_dict=user_api_key_dict, response=response, request_data=request_data):
+        async for chunk in proxy_logging_obj.async_post_call_streaming_iterator_hook(
+            user_api_key_dict=user_api_key_dict,
+            response=response,
+            request_data=request_data,
+        ):
             verbose_proxy_logger.debug(
                 "async_data_generator: received streaming chunk - {}".format(chunk)
             )
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -969,7 +969,9 @@ async def post_call_success_hook(
 
     async def async_post_call_streaming_hook(
         self,
-        response: Union[ModelResponse, EmbeddingResponse, ImageResponse],
+        response: Union[
+            ModelResponse, EmbeddingResponse, ImageResponse, ModelResponseStream
+        ],
         user_api_key_dict: UserAPIKeyAuth,
     ):
         """
@@ -2474,6 +2476,18 @@ async def update_spend_logs(
                 e=e, start_time=start_time, proxy_logging_obj=proxy_logging_obj
             )
 
+    @staticmethod
+    def disable_spend_updates() -> bool:
+        """
+        returns True if should not update spend in db
+        Skips writing spend logs and updates to key, team, user spend to DB
+        """
+        from litellm.proxy.proxy_server import general_settings
+
+        if general_settings.get("disable_spend_updates") is True:
+            return True
+        return False
+
 
 async def update_spend(  # noqa: PLR0915
     prisma_client: PrismaClient,