Spaces:
Running
Running
import os | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
# Load Hugging Face token from .env | |
hf_token = os.getenv("HF_TOKEN") | |
# Define available models | |
models = { | |
"Llama-3.3-70B-Instruct": "meta-llama/llama-3.3-70B-instruct", | |
"QwQ-32B-Preview":"Qwen/QwQ-32B-Preview", | |
"Qwen2.5-Coder-32B-Instruct": "qwen/qwen2.5-coder-32B-instruct", | |
"Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407", | |
"microsoft/phi-4": "microsoft/phi-4", | |
"Hermes-3-Llama-3.2-3B":"NousResearch/Hermes-3-Llama-3.2-3B", | |
"Phi-3-mini-4k-instruct": "microsoft/phi-3-mini-4k-instruct", | |
} | |
# Initialize the InferenceClient with a selected model | |
def get_inference_client(selected_model): | |
return InferenceClient( | |
models[selected_model], | |
token=hf_token, | |
) | |
# Function to get a response from the chatbot | |
def get_response(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p): | |
client = get_inference_client(selected_model) | |
# messaages | |
messages = [] | |
# Add system message, if not empty | |
if (len(system_prompt)) > 0: | |
messages = [{"role": "system", "content": system_prompt}] | |
# Include previous conversation history | |
for h in history: | |
messages.append({"role": h['role'], "content": h['content']}) | |
# Add the current user input to the messages | |
messages.append({"role": "user", "content": user_input}) | |
# Get response from the model | |
response = client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
bot_response = response.choices[0].message.content | |
history.append({"role": "user", "content": user_input}) | |
history.append({"role": "assistant", "content": bot_response}) | |
return history | |
# Gradio interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Set the type to 'messages' to avoid the deprecation warning | |
chatbot = gr.Chatbot(type="messages") | |
with gr.Row(): | |
user_input = gr.Textbox(show_label=False, placeholder="Enter your message...") | |
send_button = gr.Button("Send") | |
with gr.Column(scale=1): | |
with gr.Accordion("Settings", open=False): | |
# Model selection | |
selected_model = gr.Dropdown(choices=list(models.keys()), label="Select Model", value="Llama-3.3-70B-Instruct") | |
# Chat settings | |
system_prompt = gr.Textbox(value="You are a friendly and open-minded chatbot.", label="System Prompt (Optional)", lines=5) | |
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.7, label="Temperature") | |
max_tokens = gr.Slider(minimum=10, maximum=8192, step=10, value=250, label="Max Tokens") | |
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.9, label="Top-p") | |
# Chatbot interaction | |
def submit_message(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p): | |
# Get updated history including user input and bot response | |
history = get_response(user_input, history, selected_model, system_prompt, temperature, max_tokens, top_p) | |
return "", history | |
# Set up the send button click functionality | |
send_button.click( | |
submit_message, | |
[user_input, chatbot, selected_model, system_prompt, temperature, max_tokens, top_p], | |
[user_input, chatbot] | |
) | |
# Trigger sending message when Enter key is pressed | |
user_input.submit( | |
submit_message, | |
[user_input, chatbot, selected_model, system_prompt, temperature, max_tokens, top_p], | |
[user_input, chatbot] | |
) | |
# Launch the Gradio interface | |
demo.launch() | |