Merge pull request #97 from NimbleAINinja/main

ILikeAI · web-flow · commit de009e02b7fa · 2024-11-17T20:24:04.000+08:00
diff --git a/llm_apis/anthropic_client.py b/llm_apis/anthropic_client.py
@@ -3,17 +3,27 @@
 import os
 import base64
 import httpx
-from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+from tenacity import retry, stop_after_attempt, wait_exponential, \
+    retry_if_exception_type
 
 MAX_RETRIES = 5
 
+
 class AnthropicRateLimitError(Exception):
     """Exception raised for rate limit errors."""
     def __init__(self, message, retry_after):
         self.message = message
         self.retry_after = retry_after
         super().__init__(self.message)
 
+
+class AnthropicOverloadError(Exception):
+    """Exception raised for overloaded errors."""
+    def __init__(self, message):
+        self.message = message
+        super().__init__(self.message)
+
+
 class AnthropicClient:
     def __init__(self, verbose=False):
         """Initialize the Anthropic client with the API key."""
@@ -23,16 +33,28 @@ def __init__(self, verbose=False):
     @retry(
         stop=stop_after_attempt(MAX_RETRIES),
         wait=wait_exponential(multiplier=1, min=4, max=10),
-        retry=retry_if_exception_type(AnthropicRateLimitError)
+        retry=(retry_if_exception_type(AnthropicRateLimitError) |
+               retry_if_exception_type(AnthropicOverloadError))
     )
     def _make_api_call(self, api_args):
         """Make an API call with retry mechanism."""
         try:
             return self.client.messages.create(**api_args)
         except httpx.HTTPStatusError as e:
             if e.response.status_code == 429:
-                retry_after = int(e.response.headers.get('retry-after', 60))
-                raise AnthropicRateLimitError(f"Rate limit exceeded. {str(e)}", retry_after)
+                retry_after = int(e.response.headers.get(
+                    'retry-after', 60))
+                raise AnthropicRateLimitError(
+                    f"Rate limit exceeded. {str(e)}", retry_after)
+            elif e.response.status_code == 529:
+                raise AnthropicOverloadError(
+                    f"Anthropic API overloaded: {str(e)}")
+            raise
+        except anthropic.APIStatusError as e:
+            error_data = e.args[0]
+            if error_data['error']['type'] == 'overloaded_error':
+                raise AnthropicOverloadError(
+                    f"Anthropic API overloaded: {error_data['error']['message']}")
             raise
 
     def stream_completion(self, messages, model, **kwargs):
@@ -46,46 +68,41 @@ def stream_completion(self, messages, model, **kwargs):
         Yields:
             str: Text generated by the Anthropic API.
         """
-        # Extract system message if present, otherwise set to None
-        system_messages = [message['content'] for message in messages if message['role'] == 'system']
+        system_messages = [msg['content'] for msg in messages
+                           if msg['role'] == 'system']
         system_message = system_messages[0] if system_messages else None
-        
-        # Filter out system messages from the messages list
-        messages = [message for message in messages if message['role'] != 'system']
 
-        # Prepare the arguments for the Anthropic API call
+        messages = [msg for msg in messages
+                    if msg['role'] != 'system']
+
         api_args = {
             "model": model,
-            "max_tokens": kwargs.get('max_tokens', 1000),  # Default to 1000 if not provided
+            "max_tokens": kwargs.get('max_tokens', 1000),
             "stream": True,
             **kwargs
         }
-        
-        # Only include the system parameter if a system message is present
+
         if system_message:
             api_args["system"] = system_message
 
         processed_messages = []
         for message in messages:
             if 'image' in message:
-                processed_content = [
-                    {
-                        "type": "image",
-                        "source": {
-                            "type": "base64",
-                            "media_type": "image/jpeg",
-                            "data": message['image'].replace('\n', '')  # Remove newlines
-                        }
+                processed_content = [{
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "image/jpeg",
+                        "data": message['image'].replace('\n', '')
                     }
-                ]
-                
-                # Add original text content if present
+                }]
+
                 if 'content' in message and message['content']:
                     processed_content.append({
                         "type": "text",
                         "text": message['content']
                     })
-            
+
                 processed_messages.append({
                     "role": message['role'],
                     "content": processed_content
@@ -97,7 +114,8 @@ def stream_completion(self, messages, model, **kwargs):
                 })
 
         if not processed_messages:
-            raise ValueError(f"No messages to send to the API. Original messages: {messages}")
+            raise ValueError(
+                f"No messages to send. Original messages: {messages}")
 
         api_args["messages"] = processed_messages
 
@@ -110,18 +128,24 @@ def stream_completion(self, messages, model, **kwargs):
             if self.verbose:
                 print(f"Rate limit error: {e.message}. Retry after {e.retry_after} seconds.")
             raise
+        except AnthropicOverloadError as e:
+            if self.verbose:
+                print(f"Overload error: {e.message}")
+            raise
         except Exception as e:
             if self.verbose:
                 import traceback
                 traceback.print_exc()
             print(f"An error occurred streaming completion from Anthropic API: {e}")
-            raise RuntimeError(f"An error occurred streaming completion from Anthropic API: {e}")
+            raise RuntimeError(
+                f"An error occurred streaming completion from Anthropic API: {e}")
+
 
 # Test the AnthropicClient
 if __name__ == "__main__":
     client = AnthropicClient(verbose=True)
-    
-    #test text only   
+
+    # test text only
     messages = [
         {
             "role": "system",
@@ -138,18 +162,20 @@ def stream_completion(self, messages, model, **kwargs):
     try:
         for chunk in client.stream_completion(messages, model):
             print(chunk, end='', flush=True)
-        print()  # Add a newline at the end
+        print()
     except AnthropicRateLimitError as e:
-        print(f"\nRate limit error encountered: {e.message}. Retry after {e.retry_after} seconds.")
+        print(f"\nRate limit error: {e.message}. Retry after {e.retry_after} seconds.")
+    except AnthropicOverloadError as e:
+        print(f"\nOverload error: {e.message}")
     except Exception as e:
         print(f"\nAn error occurred: {e}")
 
-    #test multimodal
+    # test multimodal
     image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
     image_media_type = "image/jpeg"
     image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
- 
-    messages=[
+
+    messages = [
         {
             "role": "system",
             "content": "Respond only in rhyming couplets."
@@ -172,13 +198,15 @@ def stream_completion(self, messages, model, **kwargs):
             ],
         }
     ]
-   
+
     print("\nMultimodal Response:")
     try:
         for chunk in client.stream_completion(messages, model):
             print(chunk, end='', flush=True)
         print()
     except AnthropicRateLimitError as e:
-        print(f"\nRate limit error encountered: {e.message}. Retry after {e.retry_after} seconds.")
+        print(f"\nRate limit error: {e.message}. Retry after {e.retry_after} seconds.")
+    except AnthropicOverloadError as e:
+        print(f"\nOverload error: {e.message}")
     except Exception as e:
         print(f"\nAn error occurred: {e}")