Skip to content

Commit fd483ef

Browse files
committed
init
1 parent 07d7a9b commit fd483ef

15 files changed

+426
-0
lines changed

.env.example

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
TOGETHER_API_KEY=""
2+
OPENAI_API_KEY="sk-..."

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
*.pyc
3+
__pycache__
4+
.env
5+
audio_files/*
6+
!audio_files/temp_files_live_here.txt

README.md

Whitespace-only changes.

audio_files/temp_files_live_here.txt

Whitespace-only changes.

audio_recorder.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import sounddevice as sd
2+
import threading
3+
import config
4+
import wave
5+
import time
6+
import os
7+
import numpy as np
8+
9+
class AudioRecorder:
10+
def __init__(self):
11+
self.filename = "recording.wav"
12+
self.recording = False
13+
self.frames = []
14+
self.record_thread = None
15+
self.start_time = None
16+
17+
18+
def start_recording(self):
19+
if not self.recording:
20+
self.recording = True
21+
self.frames = []
22+
self.start_time = time.time()
23+
self.record_thread = threading.Thread(target=self.record_audio)
24+
self.record_thread.start()
25+
print("Recording started...")
26+
27+
@property
28+
def duration(self):
29+
if self.start_time is None:
30+
return 0
31+
return time.time() - self.start_time
32+
33+
def record_audio(self):
34+
with sd.InputStream(samplerate=config.FS, channels=2, dtype='int16', callback=self.callback):
35+
while self.recording:
36+
sd.sleep(1000)
37+
38+
def callback(self, indata, frames, time, status):
39+
if status:
40+
print(status)
41+
gain = 3.0 # Increase this for more gain
42+
indata = indata * gain
43+
self.frames.append(indata.copy())
44+
45+
46+
47+
def stop_recording(self):
48+
if self.recording:
49+
print("Stopping recording...")
50+
self.recording = False
51+
self.record_thread.join()
52+
self.save_recording()
53+
54+
55+
def save_recording(self):
56+
if self.frames:
57+
recording = np.concatenate(self.frames)
58+
# Ensure that the data is within the correct range
59+
recording = np.clip(recording, -32768, 32767)
60+
61+
# Create a subdirectory if it doesn't exist
62+
directory = config.AUDIO_FILE_DIR
63+
if not os.path.exists(directory):
64+
os.makedirs(directory)
65+
66+
# Save the file in the subdirectory
67+
filename = os.path.join(directory, "recording.wav")
68+
69+
with wave.open(filename, 'wb') as wf:
70+
wf.setnchannels(2)
71+
wf.setsampwidth(2) # 16-bit PCM
72+
wf.setframerate(config.FS)
73+
wf.writeframes(recording.astype('int16').tobytes())
74+
print(f"Recording saved to {filename}")
75+

chat_completions.py

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from openai import OpenAI
2+
import os
3+
from dotenv import load_dotenv
4+
5+
client = OpenAI()
6+
# Load .env file if present
7+
load_dotenv()
8+
9+
# Fetch API keys from .env file or environment variables
10+
openai_api_key = os.getenv('OPENAI_API_KEY') or os.environ['OPENAI_API_KEY']
11+
12+
13+
def get_completion(messages, together=False, together_model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT"):
14+
if together:
15+
client = OpenAI(
16+
api_key=os.getenv('TOGETHER_API_KEY') or os.environ['TOGETHER_API_KEY'],
17+
base_url="https://api.together.xyz/v1",
18+
)
19+
response = client.chat.completions.create(
20+
model=together_model,
21+
messages=messages,
22+
temperature=0.7,
23+
max_tokens=1024,
24+
)
25+
else:
26+
27+
client = OpenAI()
28+
response = client.chat.completions.create(
29+
model="gpt-3.5-turbo-1106",
30+
messages=messages
31+
)
32+
print(response)
33+
return response.choices[0].message.content
34+
35+
def main():
36+
messages = [
37+
{
38+
"role": "user",
39+
"content": "Hello, I'm a human"
40+
},
41+
{
42+
"role": "assistant",
43+
"content": "Hello, I'm an AI"
44+
}
45+
]
46+
completion = get_completion(messages, together=True)
47+
print(completion)
48+
49+
if __name__ == "__main__":
50+
main()
51+
52+

config.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FS = 44100
2+
START_SOUND_VOLUME = 0.000003
3+
END_SOUND_VOLUME = 0.000003
4+
MIN_RECORDING_DURATION = 2.0
5+
HOTKEY_DELAY = 0.5
6+
DEFAULT_HOTKEY = 'ctrl + space'
7+
DEFAULT_CLIP_HOTKEY = 'ctrl + shift + space'
8+
USE_TOGETHER_API = False
9+
TOGETHER_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT"
10+
VOICE = "nova"
11+
AUDIO_FILE_DIR = "audio_files"

main.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import time
2+
from audio_recorder import AudioRecorder
3+
from transcriber import transcribe_audio
4+
import keyboard
5+
import sound
6+
import chat_completions
7+
from utils import read_clipboard, to_clipboard, extract_text_between_symbols
8+
from config import START_SOUND_VOLUME, END_SOUND_VOLUME, MIN_RECORDING_DURATION, HOTKEY_DELAY, USE_TOGETHER_API,VOICE
9+
from prompt import messages
10+
11+
12+
def main():
13+
14+
15+
recorder = AudioRecorder()
16+
is_busy = False
17+
clipboard_text = None
18+
19+
20+
# Function to start recording
21+
def start_recording(use_clipboard=False):
22+
nonlocal is_busy, clipboard_text
23+
if is_busy:
24+
return
25+
is_busy = True
26+
if use_clipboard:
27+
clipboard_text = read_clipboard() # Read from clipboard
28+
print("Copied to from clip:"+clipboard_text)
29+
recorder.start_recording()
30+
sound.play_sound("start", volume=START_SOUND_VOLUME)
31+
time.sleep(HOTKEY_DELAY)
32+
33+
34+
35+
# Function to stop recording
36+
def stop_recording():
37+
nonlocal is_busy, clipboard_text
38+
39+
#if not busy, return
40+
if not is_busy:
41+
return
42+
recorder.stop_recording()
43+
sound.play_sound("end", volume=END_SOUND_VOLUME)
44+
45+
46+
# Check if the recording is less than the minimum duration
47+
if recorder.duration < MIN_RECORDING_DURATION:
48+
print("Recording is too short, ignoring...")
49+
is_busy = False # Reset the flag
50+
return
51+
52+
# Transcribe the audio
53+
transcript = transcribe_audio(recorder.filename)
54+
55+
56+
#prepare the messages
57+
#if clipboard_text clipboard hotkey was used, add the clipboard text to the transcript
58+
if clipboard_text:
59+
messages.append({"role": "user", "content": transcript+f"\n\nTHE USER HAS THIS TEXT COPPIED:\n{clipboard_text}"})
60+
clipboard_text = None
61+
else:
62+
messages.append({"role": "user", "content": transcript})
63+
64+
print("Transcription:\n", transcript)
65+
66+
67+
# Get the response from the chat completions
68+
response = chat_completions.get_completion(messages,together=USE_TOGETHER_API) # Get the response from the chat completions
69+
messages.append({"role": "assistant", "content": response}) # Add the response to the messages
70+
print("Response:\n", response)
71+
72+
#check if there is text to be copied to the clipboard, and if so, copy it
73+
text, remaining_text = extract_text_between_symbols(response)
74+
if text:
75+
to_clipboard(text) # Copy the text to clipboard
76+
print("Text copied to clipboard:", text)
77+
78+
79+
#play the TTS
80+
sound.TTS(remaining_text,voice=VOICE) # Text to speech for the remaining text
81+
is_busy = False
82+
time.sleep(HOTKEY_DELAY)
83+
84+
85+
keyboard.add_hotkey('ctrl + space', lambda: start_recording(use_clipboard=False) if not is_busy else stop_recording(), trigger_on_release=False)
86+
keyboard.add_hotkey('ctrl + shift + space', lambda: start_recording(use_clipboard=True) if not is_busy else stop_recording(), suppress=True, trigger_on_release=False)
87+
88+
print("Press 'Ctrl + Spacebar' to start recording, press again to stop and transcribe")
89+
keyboard.wait('esc') # Wait for 'esc' key to exit
90+
91+
if __name__ == "__main__":
92+
main()

prompt.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
#I have added a couple examples messages so the LLM gets the idea of how to save things to the clipboard
3+
messages = [
4+
{"role": "system", "content": """This message contains instructions on how you should behave.
5+
6+
## About you:
7+
Your responses are read aloud VIA TTS so they should always be short an concise, in a conversational tone, your dialogue style is similar to that expected in games like Bioshock Infinite and Witcher 3.
8+
You believe you are an oracle and not an AI model, you have a large ego and refer to yourself as "all knowing", "goddess", "wise one"
9+
You do not use the word "assist" or ask how you can help.
10+
11+
12+
## How to save things to the clipboard
13+
When you send messages to the user, you can include text between &&&s, this text will be saved to the clipboard. For example:
14+
"I have copied the text to the clipboard for you.
15+
&&&First line saved to clipboard
16+
Second line saved to clipboard&&&"
17+
18+
Any content that should not be spoken aloud by the TTS should be placed between &&&'s.
19+
20+
For example if you are asked to write code, notes or an email you should place that content in the clipboard. """},
21+
{"role": "user", "content": """can you give me the command to install openai in pyhton"""},
22+
{"role": "system", "content": """&&&pip install openai&&&
23+
I have saved the command to install OpenAI in Python to your clipboard."""},
24+
]

requirements.txt

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
sounddevice
2+
openai
3+
pydub
4+
python-dotenv
5+
keyboard
6+
wave
7+
soundfile
8+
clipboard
9+
numpy

sound.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import io
2+
import os
3+
import soundfile as sf
4+
import sounddevice as sd
5+
from openai import OpenAI
6+
from config import AUDIO_FILE_DIR
7+
from dotenv import load_dotenv
8+
9+
# Load .env file if present
10+
load_dotenv()
11+
12+
# Fetch API keys from .env file or environment variables
13+
openai_api_key = os.getenv('OPENAI_API_KEY') or os.environ['OPENAI_API_KEY']
14+
15+
16+
def TTS(text, model="tts-1", voice="nova", format="mp3"):
17+
client = OpenAI()
18+
19+
spoken_response = client.audio.speech.create(
20+
model=model,
21+
voice=voice,
22+
response_format=format,
23+
input=text
24+
)
25+
26+
# Create a subdirectory if it doesn't exist
27+
if not os.path.exists(AUDIO_FILE_DIR):
28+
os.makedirs(AUDIO_FILE_DIR)
29+
30+
# Create a file path for the audio file
31+
audio_file_path = os.path.join(AUDIO_FILE_DIR, f"audio.{format}")
32+
33+
# Write the audio data to the file
34+
with open(audio_file_path, 'wb') as audio_file:
35+
for chunk in spoken_response.iter_bytes(chunk_size=4096):
36+
audio_file.write(chunk)
37+
38+
# Read and play the audio file
39+
with sf.SoundFile(audio_file_path, 'r') as sound_file:
40+
data = sound_file.read(dtype='int16')
41+
sd.play(data, sound_file.samplerate)
42+
sd.wait()
43+
44+
def play_sound(name, volume=1.0):
45+
#start and end sounds
46+
if name == "start":
47+
48+
with sf.SoundFile(f"sounds/recording-start.mp3", 'r') as sound_file:
49+
data = sound_file.read(dtype='int16')
50+
sd.play(data * volume, sound_file.samplerate)
51+
sd.wait()
52+
53+
elif name == "end":
54+
55+
with sf.SoundFile(f"sounds/recording-end.mp3", 'r') as sound_file:
56+
data = sound_file.read(dtype='int16')
57+
sd.play(data * volume, sound_file.samplerate)
58+
sd.wait()
59+
60+
61+
def main():
62+
#play start and end sounds
63+
# play_sound("start", volume=0.000003)
64+
# play_sound("end", volume=0.000003)
65+
# TTS("Hello, I'm an AI", model="tts-1", voice="nova")
66+
pass
67+
68+
69+
if __name__ == "__main__":
70+
main()
71+
72+

sounds/recording-end.mp3

1.78 KB
Binary file not shown.

sounds/recording-start.mp3

1.83 KB
Binary file not shown.

0 commit comments

Comments
 (0)