diff --git a/docs.json b/docs.json index 2f1b9f70..75631196 100644 --- a/docs.json +++ b/docs.json @@ -157,6 +157,7 @@ "server/services/llm/google-vertex", "server/services/llm/grok", "server/services/llm/groq", + "server/services/llm/nebius", "server/services/llm/nim", "server/services/llm/ollama", "server/services/llm/openai", diff --git a/server/services/llm/nebius.mdx b/server/services/llm/nebius.mdx new file mode 100644 index 00000000..5de9c6bd --- /dev/null +++ b/server/services/llm/nebius.mdx @@ -0,0 +1,333 @@ +--- +title: "Nebius" +description: "LLM service implementation using Nebius AI Studio's API" +--- + +## Overview + +`NebiusLLMService` provides access to Nebius AI Studio's language models, supporting streaming responses, function calling, and context management with high-performance inference capabilities and flexible model selection. + + + + Pipecat's API methods for Nebius integration + + + Complete example with function calling + + + Official Nebius AI Studio documentation + + + Access models and manage API keys + + + +## Installation + +To use Nebius services, install the required dependencies: + +```bash +pip install "pipecat-ai[nebius]" +``` + +## Prerequisites + +### Nebius Account Setup + +Before using Nebius LLM services, you need: + +1. **Nebius Account**: Sign up at [Nebius AI Studio](https://studio.nebius.com/) +2. **API Key**: Generate an API key from your studio dashboard +3. **Model Selection**: Choose from available Nebius models (Qwen, Llama, etc.) +4. **Resource Allocation**: Configure compute resources as needed + +### Required Environment Variables + +- `NEBIUS_API_KEY`: Your Nebius API key for authentication + +## Configuration + +### Basic Setup + +```python +from pipecat.services.nebius import NebiusLLMService + +# Initialize the Nebius LLM service +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + params={ + "temperature": 0.7, + "max_tokens": 1000, + "top_p": 0.95, + "frequency_penalty": 0.0, + "presence_penalty": 0.0 + } +) +``` + +### Available Models + +Nebius AI Studio provides access to various language models: + +- **Qwen Models**: `Qwen/Qwen3-30B-A3B-fast`, `Qwen/Qwen3-70B-Instruct` +- **Llama Models**: `meta-llama/Llama-3-8b-chat-hf`, `meta-llama/Llama-3-70b-chat-hf` +- **Custom Models**: Contact Nebius for enterprise model deployment + +### Advanced Configuration + +```python +from pipecat.services.nebius import NebiusLLMService + +llm = NebiusLLMService( + api_key="your-nebius-api-key", + base_url="https://api.studio.nebius.com/v1/", # Custom endpoint if needed + model="Qwen/Qwen3-30B-A3B-fast", + params={ + "temperature": 0.6, + "max_tokens": 2048, + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "stream": True # Enable streaming responses + } +) +``` + +## Usage Examples + +### Basic Chat Completion + +```python +import asyncio +from pipecat.services.nebius import NebiusLLMService +from pipecat.frames.frames import TextFrame + +async def main(): + llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast" + ) + + # Process a text frame + response = await llm.process_frame( + TextFrame("What are the key benefits of using Nebius AI Studio?") + ) + print(response) + +asyncio.run(main()) +``` + +### Function Calling + +```python +from pipecat.services.nebius import NebiusLLMService + +# Define functions for the LLM to use +functions = [ + { + "name": "get_weather", + "description": "Get current weather information", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"] + } + } +] + +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + params={"functions": functions} +) +``` + +### Streaming Responses + +```python +async def stream_response(): + llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + params={"stream": True} + ) + + async for chunk in llm.run_llm("Tell me about AI applications"): + if chunk: + print(chunk, end="", flush=True) +``` + +## Pipeline Integration + +### Complete Voice Assistant Example + +```python +import asyncio +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.services.nebius import NebiusLLMService +from pipecat.services.deepgram import DeepgramSTTService +from pipecat.services.cartesia import CartesiaTTSService +from pipecat.transports.daily import DailyTransport + +async def run_bot(): + # Initialize services + stt = DeepgramSTTService(api_key="your-deepgram-key") + llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast" + ) + tts = CartesiaTTSService(api_key="your-cartesia-key") + transport = DailyTransport( + room_url="your-daily-room-url", + token="your-daily-token" + ) + + # Create pipeline + pipeline = Pipeline([stt, llm, tts]) + + # Run the bot + runner = PipelineRunner() + await runner.run(pipeline, transport) + +asyncio.run(run_bot()) +``` + +## Authentication + +### API Key Authentication + +```python +# Set environment variable +export NEBIUS_API_KEY="your-nebius-api-key" + +# Or pass directly in code +llm = NebiusLLMService(api_key="your-nebius-api-key") +``` + +### Custom Headers + +```python +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + headers={"Custom-Header": "value"} +) +``` + +## Error Handling + +```python +from pipecat.services.nebius import NebiusLLMService, NebiusException + +try: + llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast" + ) + + response = await llm.process_frame(TextFrame("Hello")) + +except NebiusException as e: + print(f"Nebius API error: {e}") +except Exception as e: + print(f"General error: {e}") +``` + +## Performance Optimization + +### Batching Requests + +```python +# Enable request batching for better performance +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + params={ + "batch_size": 10, + "batch_timeout": 0.1 + } +) +``` + +### Connection Pooling + +```python +# Configure connection pooling +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + max_connections=20, + timeout=30.0 +) +``` + +## Troubleshooting + +### Common Issues + +1. **API Key Issues** + - Ensure your API key is valid and has sufficient credits + - Check that the API key has access to the selected model + +2. **Model Availability** + - Verify the model name is correct and available in your region + - Some models may require special access permissions + +3. **Rate Limiting** + - Implement exponential backoff for rate-limited requests + - Monitor your usage in the Nebius AI Studio dashboard + +4. **Streaming Issues** + - Ensure your network supports persistent connections + - Check firewall settings for WebSocket/SSE connections + +### Debug Mode + +```python +import logging + +# Enable debug logging +logging.basicConfig(level=logging.DEBUG) + +llm = NebiusLLMService( + api_key="your-nebius-api-key", + model="Qwen/Qwen3-30B-A3B-fast", + debug=True +) +``` + +## Best Practices + +1. **Model Selection**: Choose the appropriate model size based on your latency and quality requirements +2. **Parameter Tuning**: Adjust temperature and top_p values based on your use case +3. **Error Handling**: Implement robust error handling for production applications +4. **Monitoring**: Monitor API usage and response times in production +5. **Security**: Never expose API keys in client-side code or public repositories + +## Support + +For additional support with Nebius integration: + +- [Nebius AI Studio Documentation](https://docs.studio.nebius.com/) +- [Nebius Support Portal](https://support.nebius.com/) +- [Pipecat Community Discord](https://discord.gg/pipecat)