Skip to content

Commit 2d58d0e

Browse files
committed
Reapply "Merge pull request #93 from NimbleAINinja/main"
This reverts commit 0e35a1f.
1 parent 0e35a1f commit 2d58d0e

File tree

10 files changed

+531
-67
lines changed

10 files changed

+531
-67
lines changed

actions/always_reddy_voice_assistant/main.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -41,28 +41,54 @@ def handle_default_assistant_response(self):
4141

4242
if len(self.messages) > 0 and self.messages[0]["role"] == "system":
4343
self.messages[0]["content"] = prompt.get_system_prompt_message(config.ACTIVE_PROMPT)
44-
4544
if self.last_message_was_cut_off:
4645
message = "--> USER CUT THE ASSISTANTS LAST MESSAGE SHORT <--\n" + message
4746

48-
if self.AR.clipboard_text and self.AR.clipboard_text != self.AR.last_clipboard_text:
49-
message += f"\n\nTHE USER HAS GANTED YOU ACCESS TO THEIR CLIPABORD, THIS IS ITS CONTENT (ignore if user doesn't mention it):\n```{self.AR.clipboard_text}```"
47+
new_message = {"role": "user", "content": message}
48+
49+
if hasattr(self.AR, 'clipboard_image') and self.AR.clipboard_image:
50+
new_message['content'] = [
51+
{
52+
"type": "image",
53+
"source": {
54+
"type": "base64",
55+
"media_type": "image/jpeg",
56+
"data": self.AR.clipboard_image.replace('\n', '')
57+
}
58+
},
59+
{
60+
"type": "text",
61+
"text": message + "\n\nTHE USER HAS GRANTED YOU ACCESS TO AN IMAGE FROM THEIR CLIPBOARD. ANALYZE AND BRIEFLY DESCRIBE THE IMAGE IF RELEVANT TO THE CONVERSATION."
62+
}
63+
]
64+
self.AR.clipboard_image = None
65+
elif self.AR.clipboard_text and self.AR.clipboard_text != self.AR.last_clipboard_text:
66+
new_message['content'] += f"\n\nTHE USER HAS GRANTED YOU ACCESS TO THEIR CLIPBOARD, THIS IS ITS CONTENT (ignore if user doesn't mention it):\n```{self.AR.clipboard_text}```"
5067
self.AR.last_clipboard_text = self.AR.clipboard_text
5168
self.AR.clipboard_text = None
5269

5370
if config.TIMESTAMP_MESSAGES:
54-
message += f"\n\nMESSAGE TIMESTAMP:{time.strftime('%I:%M %p')} {time.strftime('%Y-%m-%d (%A)')} "
71+
timestamp = f"\n\nMESSAGE TIMESTAMP:{time.strftime('%I:%M %p')} {time.strftime('%Y-%m-%d (%A)')} "
72+
if isinstance(new_message['content'], list):
73+
new_message['content'][-1]['text'] += timestamp
74+
else:
75+
new_message['content'] += timestamp
5576

56-
self.messages.append({"role": "user", "content": message})
77+
self.messages.append(new_message)
5778

5879
if self.AR.stop_action:
5980
return
6081

82+
# Ensure there's at least one message
83+
if not self.messages:
84+
print("Error: No messages to send to the API.")
85+
return
86+
6187
stream = self.AR.completion_client.get_completion_stream(self.messages, config.COMPLETION_MODEL, **config.COMPLETION_PARAMS)
6288
response = self.AR.completion_client.process_text_stream(stream,
6389
marker_tuples=[(config.CLIPBOARD_TEXT_START_SEQ, config.CLIPBOARD_TEXT_END_SEQ, to_clipboard)],
64-
sentence_callback=self.AR.tts.run_tts)#We pass in pairs of start and end sequences to the marker_tuples argument to indicate that the text between these sequences should be copied to the clipboard, then we pass the to_clipboard function as the callback to handle this action.
65-
90+
sentence_callback=self.AR.tts.run_tts)
91+
6692
while self.AR.tts.running_tts:
6793
time.sleep(0.001)
6894

@@ -84,16 +110,14 @@ def handle_default_assistant_response(self):
84110
print("\nResponse:\n", response)
85111

86112
except Exception as e:
113+
print(f"An error occurred in handle_default_assistant_response: {e}")
87114
if self.AR.verbose:
88115
import traceback
89116
traceback.print_exc()
90-
else:
91-
print(f"An error occurred while handling the response: {e}")
92-
93117

94118
def new_chat(self):
95119
"""Clear the message history and start a new chat session."""
96120
self.messages = prompt.build_initial_messages(config.ACTIVE_PROMPT)
97121
self.last_message_was_cut_off = False
98122
self.AR.last_clipboard_text = None
99-
print("New chat session started.")
123+
print("New chat session started.")

image_supported_models.json

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"supported_models": [
3+
"claude-3-opus-20240229",
4+
"claude-3-sonnet-20240229",
5+
"claude-3-haiku-20240307",
6+
"claude-3-5-sonnet-20240620",
7+
"claude-3-5-sonnet-20241022",
8+
"gpt-4o",
9+
"gpt-4o-mini",
10+
"gpt-4-turbo-2024-04-09",
11+
"gpt-4",
12+
"pixtral-12b",
13+
"llava-v1.5-13b",
14+
"llava-v1.5-7b",
15+
"mini-cpm-2.6",
16+
"bunny-llama-3-8b",
17+
"deepseek-vl",
18+
"idefics2-8b",
19+
"llava-llama-3-8b",
20+
"meta-llama/llama-3.2-11b-vision-instruct:free"
21+
]
22+
}

llm_apis/anthropic_client.py

+110-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from anthropic import Anthropic
2+
import anthropic.types
23
import os
4+
import base64
5+
import httpx
36

47
class AnthropicClient:
58
def __init__(self, verbose=False):
@@ -13,7 +16,7 @@ def stream_completion(self, messages, model, **kwargs):
1316
Args:
1417
messages (list): List of messages.
1518
model (str): Model for completion.
16-
**kwargs: Additional keyword arguments.
19+
**kwargs: Additional keyword arguments, including max_tokens if specified.
1720
1821
Yields:
1922
str: Text generated by the Anthropic API.
@@ -29,24 +32,120 @@ def stream_completion(self, messages, model, **kwargs):
2932
# Prepare the arguments for the Anthropic API call
3033
api_args = {
3134
"model": model,
32-
"messages": messages,
35+
"max_tokens": kwargs.get('max_tokens', 1000), # Default to 1000 if not provided
3336
**kwargs
3437
}
3538

3639
# Only include the system parameter if a system message is present
3740
if system_message:
3841
api_args["system"] = system_message
3942

40-
# Stream the completion
41-
stream = self.client.messages.stream(**api_args)
42-
43-
with stream as stream:
44-
for text in stream.text_stream:
45-
yield text
43+
processed_messages = []
44+
for message in messages:
45+
if 'image' in message:
46+
processed_content = [
47+
{
48+
"type": "image",
49+
"source": {
50+
"type": "base64",
51+
"media_type": "image/jpeg",
52+
"data": message['image'].replace('\n', '') # Remove newlines
53+
}
54+
}
55+
]
56+
57+
# Add original text content if present
58+
if 'content' in message and message['content']:
59+
processed_content.append({
60+
"type": "text",
61+
"text": message['content']
62+
})
63+
64+
processed_messages.append({
65+
"role": message['role'],
66+
"content": processed_content
67+
})
68+
else:
69+
processed_messages.append({
70+
"role": message['role'],
71+
"content": message['content']
72+
})
73+
74+
if not processed_messages:
75+
raise ValueError(f"No messages to send to the API. Original messages: {messages}")
76+
77+
api_args["messages"] = processed_messages
78+
79+
with self.client.messages.stream(**api_args) as stream:
80+
for event in stream:
81+
if isinstance(event, anthropic.types.MessageStartEvent):
82+
continue
83+
if isinstance(event, anthropic.types.ContentBlockStartEvent):
84+
continue
85+
if isinstance(event, anthropic.types.ContentBlockDeltaEvent):
86+
yield event.delta.text
4687
except Exception as e:
4788
if self.verbose:
4889
import traceback
4990
traceback.print_exc()
50-
else:
51-
print(f"An error occurred streaming completion from Anthropic API: {e}")
52-
raise RuntimeError(f"An error occurred streaming completion from Anthropic API: {e}")
91+
print(f"An error occurred streaming completion from Anthropic API: {e}")
92+
raise RuntimeError(f"An error occurred streaming completion from Anthropic API: {e}")
93+
94+
95+
# Test the AnthropicClient
96+
if __name__ == "__main__":
97+
client = AnthropicClient(verbose=True)
98+
99+
#test text only
100+
messages = [
101+
{
102+
"role": "system",
103+
"content": "Be precise and concise."
104+
},
105+
{
106+
"role": "user",
107+
"content": "What is the capital of France?"
108+
}
109+
]
110+
model = "claude-3-5-sonnet-20240620"
111+
112+
print("Response:")
113+
for chunk in client.stream_completion(messages, model):
114+
print(chunk, end='', flush=True)
115+
print() # Add a newline at the end
116+
117+
118+
#test multimodal
119+
image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
120+
image_media_type = "image/jpeg"
121+
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
122+
123+
messages=[
124+
{
125+
"role": "system",
126+
"content": "Respond only in rhyming couplets."
127+
},
128+
{
129+
"role": "user",
130+
"content": "Should I eat this?"
131+
},
132+
{
133+
"role": "user",
134+
"content": [
135+
{
136+
"type": "image",
137+
"source": {
138+
"type": "base64",
139+
"media_type": image_media_type,
140+
"data": image_data,
141+
},
142+
}
143+
],
144+
}
145+
]
146+
147+
print("Response:")
148+
for chunk in client.stream_completion(messages, model):
149+
print(chunk, end='', flush=True)
150+
print()
151+

llm_apis/lm_studio_client.py

+81-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from openai import OpenAI
2+
import base64
3+
import httpx
24

35
class LM_StudioClient:
46
"""Client for interacting with LM studio using a local server and openai lib."""
@@ -11,16 +13,42 @@ def stream_completion(self, messages, model, **kwargs):
1113
1214
Args:
1315
messages (list): List of messages.
14-
model (str): Model for completion, this for now is always "local-model"
16+
model (str): Model for completion
1517
**kwargs: Additional keyword arguments.
1618
1719
Yields:
1820
str: Text generated.
1921
"""
2022
try:
23+
# Process messages to handle multimodal content
24+
processed_messages = []
25+
for message in messages:
26+
content = []
27+
28+
# Handle text content
29+
if isinstance(message.get('content'), str):
30+
content.append({"type": "text", "text": message['content']})
31+
# Handle multimodal content
32+
elif isinstance(message.get('content'), list):
33+
for item in message['content']:
34+
if item.get('type') == 'image':
35+
content.append({
36+
"type": "image_url",
37+
"image_url": {
38+
"url": f"data:{item['source']['media_type']};base64,{item['source']['data']}"
39+
}
40+
})
41+
else:
42+
content.append(item)
43+
44+
processed_messages.append({
45+
"role": message['role'],
46+
"content": content if content else message.get('content')
47+
})
48+
2149
stream = self.client.chat.completions.create(
2250
model=model,
23-
messages=messages,
51+
messages=processed_messages,
2452
stream=True,
2553
**kwargs
2654
)
@@ -36,13 +64,54 @@ def stream_completion(self, messages, model, **kwargs):
3664
print(f"An error occurred streaming completion from LM studio: {e}")
3765
raise RuntimeError(f"An error occurred streaming completion from LM studio: {e}")
3866

39-
# # Example usage
40-
# if __name__ == "__main__":
41-
# client = LM_StudioClient(base_url="http://localhost:1234/v1", verbose=True)
42-
# messages = [
43-
# {"role": "system", "content": "Always answer in rhymes."},
44-
# {"role": "user", "content": "Introduce yourself."}
45-
# ]
46-
# model = "local-model"
47-
# for content in client.stream_completion(messages, model):
48-
# print(content)
67+
# Test the LMStudioClient
68+
if __name__ == "__main__":
69+
client = LM_StudioClient(verbose=True)
70+
71+
#test text only
72+
messages = [
73+
{
74+
"role": "system",
75+
"content": "Be precise and concise."
76+
},
77+
{
78+
"role": "user",
79+
"content": "What is the capital of France?"
80+
}
81+
]
82+
model = "your_model_name_here" # Replace with your actual model name
83+
84+
print("\nText-only Response:")
85+
for chunk in client.stream_completion(messages, model):
86+
print(chunk, end='', flush=True)
87+
print() # Add a newline at the end
88+
89+
90+
#test multimodal
91+
image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
92+
image_media_type = "image/jpeg"
93+
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
94+
95+
messages = [
96+
{
97+
"role": "system",
98+
"content": "Respond only in rhyming couplets."
99+
},
100+
{
101+
"role": "user",
102+
"content": [
103+
{"type": "text", "text": "Should I eat this?"},
104+
{
105+
"type": "image_url",
106+
"image_url": {
107+
"url": f"data:image/jpeg;base64,{image_data}"
108+
}
109+
}
110+
]
111+
}
112+
]
113+
114+
print("\nMultimodal Response:")
115+
for chunk in client.stream_completion(messages, model):
116+
print(chunk, end='', flush=True)
117+
print()

0 commit comments

Comments
 (0)