ILikeAI
diff --git a/‎actions/always_reddy_voice_assistant/main.py
+35-11 b/‎actions/always_reddy_voice_assistant/main.py
+35-11
diff --git a/‎image_supported_models.json
+22 b/‎image_supported_models.json
+22
diff --git a/‎llm_apis/anthropic_client.py
+110-11 b/‎llm_apis/anthropic_client.py
+110-11
diff --git a/‎llm_apis/lm_studio_client.py
+81-12 b/‎llm_apis/lm_studio_client.py
+81-12
@@ -41,28 +41,54 @@ def handle_default_assistant_response(self):
 
                 if len(self.messages) > 0 and self.messages[0]["role"] == "system":
                     self.messages[0]["content"] = prompt.get_system_prompt_message(config.ACTIVE_PROMPT)
-
                 if self.last_message_was_cut_off:
                     message = "--> USER CUT THE ASSISTANTS LAST MESSAGE SHORT <--\n" + message
 
-                if self.AR.clipboard_text and self.AR.clipboard_text != self.AR.last_clipboard_text:
-                    message += f"\n\nTHE USER HAS GANTED YOU ACCESS TO THEIR CLIPABORD, THIS IS ITS CONTENT (ignore if user doesn't mention it):\n```{self.AR.clipboard_text}```"
+                new_message = {"role": "user", "content": message}
+
+                if hasattr(self.AR, 'clipboard_image') and self.AR.clipboard_image:
+                    new_message['content'] = [
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": "image/jpeg",  
+                                "data": self.AR.clipboard_image.replace('\n', '')
+                            }
+                        },
+                        {
+                            "type": "text",
+                            "text": message + "\n\nTHE USER HAS GRANTED YOU ACCESS TO AN IMAGE FROM THEIR CLIPBOARD. ANALYZE AND BRIEFLY DESCRIBE THE IMAGE IF RELEVANT TO THE CONVERSATION."
+                        }
+                    ]
+                    self.AR.clipboard_image = None
+                elif self.AR.clipboard_text and self.AR.clipboard_text != self.AR.last_clipboard_text:
+                    new_message['content'] += f"\n\nTHE USER HAS GRANTED YOU ACCESS TO THEIR CLIPBOARD, THIS IS ITS CONTENT (ignore if user doesn't mention it):\n```{self.AR.clipboard_text}```"
                     self.AR.last_clipboard_text = self.AR.clipboard_text
                     self.AR.clipboard_text = None
 
                 if config.TIMESTAMP_MESSAGES:
-                    message += f"\n\nMESSAGE TIMESTAMP:{time.strftime('%I:%M %p')} {time.strftime('%Y-%m-%d (%A)')} "
+                    timestamp = f"\n\nMESSAGE TIMESTAMP:{time.strftime('%I:%M %p')} {time.strftime('%Y-%m-%d (%A)')} "
+                    if isinstance(new_message['content'], list):
+                        new_message['content'][-1]['text'] += timestamp
+                    else:
+                        new_message['content'] += timestamp
 
-                self.messages.append({"role": "user", "content": message})
+                self.messages.append(new_message)
 
                 if self.AR.stop_action:
                     return
 
+                # Ensure there's at least one message
+                if not self.messages:
+                    print("Error: No messages to send to the API.")
+                    return
+
                 stream = self.AR.completion_client.get_completion_stream(self.messages, config.COMPLETION_MODEL, **config.COMPLETION_PARAMS)
                 response = self.AR.completion_client.process_text_stream(stream,
                                                                          marker_tuples=[(config.CLIPBOARD_TEXT_START_SEQ, config.CLIPBOARD_TEXT_END_SEQ, to_clipboard)],
-                                                                          sentence_callback=self.AR.tts.run_tts)#We pass in pairs of start and end sequences to the marker_tuples argument to indicate that the text between these sequences should be copied to the clipboard, then we pass the to_clipboard function as the callback to handle this action.
-                    
+                                                                          sentence_callback=self.AR.tts.run_tts)
+
                 while self.AR.tts.running_tts:
                     time.sleep(0.001)
 
@@ -84,16 +110,14 @@ def handle_default_assistant_response(self):
                 print("\nResponse:\n", response)
 
         except Exception as e:
+            print(f"An error occurred in handle_default_assistant_response: {e}")
             if self.AR.verbose:
                 import traceback
                 traceback.print_exc()
-            else:
-                print(f"An error occurred while handling the response: {e}")
-
 
     def new_chat(self):
         """Clear the message history and start a new chat session."""
         self.messages = prompt.build_initial_messages(config.ACTIVE_PROMPT)
         self.last_message_was_cut_off = False
         self.AR.last_clipboard_text = None
-        print("New chat session started.")
+        print("New chat session started.")
@@ -0,0 +1,22 @@
+{
+  "supported_models": [
+    "claude-3-opus-20240229",
+    "claude-3-sonnet-20240229",
+    "claude-3-haiku-20240307",
+    "claude-3-5-sonnet-20240620",
+    "claude-3-5-sonnet-20241022",
+    "gpt-4o",
+    "gpt-4o-mini",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4",
+    "pixtral-12b",
+    "llava-v1.5-13b",
+    "llava-v1.5-7b",
+    "mini-cpm-2.6",
+    "bunny-llama-3-8b",
+    "deepseek-vl",
+    "idefics2-8b",
+    "llava-llama-3-8b",
+    "meta-llama/llama-3.2-11b-vision-instruct:free" 
+  ]
+}
@@ -1,5 +1,8 @@
 from anthropic import Anthropic
+import anthropic.types
 import os
+import base64
+import httpx
 
 class AnthropicClient:
     def __init__(self, verbose=False):
@@ -13,7 +16,7 @@ def stream_completion(self, messages, model, **kwargs):
         Args:
             messages (list): List of messages.
             model (str): Model for completion.
-            **kwargs: Additional keyword arguments.
+            **kwargs: Additional keyword arguments, including max_tokens if specified.
 
         Yields:
             str: Text generated by the Anthropic API.
@@ -29,24 +32,120 @@ def stream_completion(self, messages, model, **kwargs):
             # Prepare the arguments for the Anthropic API call
             api_args = {
                 "model": model,
-                "messages": messages,
+                "max_tokens": kwargs.get('max_tokens', 1000),  # Default to 1000 if not provided
                 **kwargs
             }
 
             # Only include the system parameter if a system message is present
             if system_message:
                 api_args["system"] = system_message
 
-            # Stream the completion
-            stream = self.client.messages.stream(**api_args)
-            
-            with stream as stream:
-                for text in stream.text_stream:
-                    yield text
+            processed_messages = []
+            for message in messages:
+                if 'image' in message:
+                    processed_content = [
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": "image/jpeg",
+                                "data": message['image'].replace('\n', '')  # Remove newlines
+                            }
+                        }
+                    ]
+                    
+                    # Add original text content if present
+                    if 'content' in message and message['content']:
+                        processed_content.append({
+                            "type": "text",
+                            "text": message['content']
+                        })
+                
+                    processed_messages.append({
+                        "role": message['role'],
+                        "content": processed_content
+                    })
+                else:
+                    processed_messages.append({
+                        "role": message['role'],
+                        "content": message['content']
+                    })
+
+            if not processed_messages:
+                raise ValueError(f"No messages to send to the API. Original messages: {messages}")
+
+            api_args["messages"] = processed_messages
+
+            with self.client.messages.stream(**api_args) as stream:
+                for event in stream:
+                    if isinstance(event, anthropic.types.MessageStartEvent):
+                        continue
+                    if isinstance(event, anthropic.types.ContentBlockStartEvent):
+                        continue
+                    if isinstance(event, anthropic.types.ContentBlockDeltaEvent):
+                        yield event.delta.text
         except Exception as e:
             if self.verbose:
                 import traceback
                 traceback.print_exc()
-            else:
-                print(f"An error occurred streaming completion from Anthropic API: {e}")
-            raise RuntimeError(f"An error occurred streaming completion from Anthropic API: {e}")
+            print(f"An error occurred streaming completion from Anthropic API: {e}")
+            raise RuntimeError(f"An error occurred streaming completion from Anthropic API: {e}")
+        
+        
+# Test the AnthropicClient
+if __name__ == "__main__":
+    client = AnthropicClient(verbose=True)
+    
+#test text only   
+    messages = [
+        {
+            "role": "system",
+            "content": "Be precise and concise."
+        },
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ]
+    model = "claude-3-5-sonnet-20240620"
+
+    print("Response:")
+    for chunk in client.stream_completion(messages, model):
+        print(chunk, end='', flush=True)
+    print()  # Add a newline at the end
+
+    
+#test multimodal
+    image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
+    image_media_type = "image/jpeg"
+    image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+ 
+    messages=[
+        {
+            "role": "system",
+            "content": "Respond only in rhyming couplets."
+        },
+        {
+            "role": "user",
+            "content": "Should I eat this?"
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": image_media_type,
+                        "data": image_data,
+                    },
+                }
+            ],
+        }
+    ]
+   
+    print("Response:")
+    for chunk in client.stream_completion(messages, model):
+        print(chunk, end='', flush=True)
+    print()  
+
@@ -1,4 +1,6 @@
 from openai import OpenAI
+import base64
+import httpx
 
 class LM_StudioClient:
     """Client for interacting with LM studio using a local server and openai lib."""
@@ -11,16 +13,42 @@ def stream_completion(self, messages, model, **kwargs):
 
         Args:
             messages (list): List of messages.
-            model (str): Model for completion, this for now is always "local-model"
+            model (str): Model for completion
             **kwargs: Additional keyword arguments.
 
         Yields:
             str: Text generated.
         """
         try:
+            # Process messages to handle multimodal content
+            processed_messages = []
+            for message in messages:
+                content = []
+                
+                # Handle text content
+                if isinstance(message.get('content'), str):
+                    content.append({"type": "text", "text": message['content']})
+                # Handle multimodal content
+                elif isinstance(message.get('content'), list):
+                    for item in message['content']:
+                        if item.get('type') == 'image':
+                            content.append({
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{item['source']['media_type']};base64,{item['source']['data']}"
+                                }
+                            })
+                        else:
+                            content.append(item)
+                
+                processed_messages.append({
+                    "role": message['role'],
+                    "content": content if content else message.get('content')
+                })
+
             stream = self.client.chat.completions.create(
                 model=model,
-                messages=messages,
+                messages=processed_messages,
                 stream=True,
                 **kwargs
             )
@@ -36,13 +64,54 @@ def stream_completion(self, messages, model, **kwargs):
                 print(f"An error occurred streaming completion from LM studio: {e}")
             raise RuntimeError(f"An error occurred streaming completion from LM studio: {e}")
 
-# # Example usage
-# if __name__ == "__main__":
-#     client = LM_StudioClient(base_url="http://localhost:1234/v1", verbose=True)
-#     messages = [
-#         {"role": "system", "content": "Always answer in rhymes."},
-#         {"role": "user", "content": "Introduce yourself."}
-#     ]
-#     model = "local-model"
-#     for content in client.stream_completion(messages, model):
-#         print(content)
+# Test the LMStudioClient
+if __name__ == "__main__":
+    client = LM_StudioClient(verbose=True)
+    
+    #test text only   
+    messages = [
+        {
+            "role": "system",
+            "content": "Be precise and concise."
+        },
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ]
+    model = "your_model_name_here"  # Replace with your actual model name
+
+    print("\nText-only Response:")
+    for chunk in client.stream_completion(messages, model):
+        print(chunk, end='', flush=True)
+    print()  # Add a newline at the end
+
+    
+    #test multimodal
+    image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
+    image_media_type = "image/jpeg"
+    image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+ 
+    messages = [
+        {
+            "role": "system",
+            "content": "Respond only in rhyming couplets."
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Should I eat this?"},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{image_data}"
+                    }
+                }
+            ]
+        }
+    ]
+   
+    print("\nMultimodal Response:")
+    for chunk in client.stream_completion(messages, model):
+        print(chunk, end='', flush=True)
+    print()