diff --git a/servers/huggingface/Dockerfile b/servers/huggingface/Dockerfile new file mode 100644 index 0000000..478aaf2 --- /dev/null +++ b/servers/huggingface/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy server code +COPY server.py . + +# Expose port +EXPOSE 8000 + +# Run the FastMCP server +CMD ["python", "-m", "fastmcp", "run", "server.py", "--transport", "streamable-http", "--port", "8000"] diff --git a/servers/huggingface/README.md b/servers/huggingface/README.md new file mode 100644 index 0000000..ece3905 --- /dev/null +++ b/servers/huggingface/README.md @@ -0,0 +1,532 @@ +# Hugging Face MCP Server + +MCP server for accessing the Hugging Face Inference API. Run 200,000+ machine learning models including LLMs, image generation, text classification, embeddings, and more. + +## Features + +- **Text Generation**: LLMs like Llama-3, Mistral, Gemma +- **Image Generation**: FLUX, Stable Diffusion XL, SD 2.1 +- **Text Classification**: Sentiment analysis, topic classification +- **Token Classification**: Named entity recognition, POS tagging +- **Question Answering**: Extract answers from context +- **Summarization**: Condense long text +- **Translation**: 200+ language pairs +- **Image-to-Text**: Image captioning +- **Image Classification**: Classify images into categories +- **Object Detection**: Detect objects with bounding boxes +- **Text-to-Speech**: Convert text to audio +- **Speech Recognition**: Transcribe audio (Whisper) +- **Embeddings**: Get text/sentence embeddings +- **And more**: Fill-mask, sentence similarity + +## Setup + +### Prerequisites + +- Hugging Face account +- API token (free or Pro) + +### Environment Variables + +- `HUGGINGFACE_API_TOKEN` (required): Your Hugging Face API token + +**How to get an API token:** +1. Go to [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) +2. Click "New token" +3. Give it a name and select permissions (read is sufficient for inference) +4. Copy the token (starts with `hf_`) +5. Store as `HUGGINGFACE_API_TOKEN` + +## Available Tools + +### Text Generation Tools + +#### `text_generation` +Generate text using large language models. + +**Parameters:** +- `prompt` (string, required): Input text prompt +- `model_id` (string, optional): Model ID (default: 'mistralai/Mistral-7B-Instruct-v0.3') +- `max_new_tokens` (int, optional): Maximum tokens to generate +- `temperature` (float, optional): Sampling temperature 0-2 (higher = more random) +- `top_p` (float, optional): Nucleus sampling 0-1 +- `top_k` (int, optional): Top-k sampling +- `repetition_penalty` (float, optional): Penalty for repetition +- `return_full_text` (bool, optional): Return prompt + generation (default: False) + +**Popular models:** +- `meta-llama/Llama-3.2-3B-Instruct` - Meta's Llama 3.2 +- `mistralai/Mistral-7B-Instruct-v0.3` - Mistral 7B +- `google/gemma-2-2b-it` - Google Gemma 2 +- `HuggingFaceH4/zephyr-7b-beta` - Zephyr 7B +- `tiiuae/falcon-7b-instruct` - Falcon 7B + +**Example:** +```python +result = await text_generation( + prompt="Write a Python function to calculate fibonacci numbers:", + model_id="mistralai/Mistral-7B-Instruct-v0.3", + max_new_tokens=200, + temperature=0.7, + top_p=0.9 +) +``` + +### Classification Tools + +#### `text_classification` +Classify text into categories (sentiment, topics, etc.). + +**Parameters:** +- `text` (string, required): Text to classify +- `model_id` (string, optional): Model ID (default: 'distilbert-base-uncased-finetuned-sst-2-english') + +**Popular models:** +- `distilbert-base-uncased-finetuned-sst-2-english` - Sentiment (positive/negative) +- `facebook/bart-large-mnli` - Zero-shot classification +- `cardiffnlp/twitter-roberta-base-sentiment-latest` - Twitter sentiment +- `finiteautomata/bertweet-base-sentiment-analysis` - Tweet sentiment + +**Example:** +```python +result = await text_classification( + text="I love this product! It exceeded my expectations.", + model_id="distilbert-base-uncased-finetuned-sst-2-english" +) +# Returns: [{'label': 'POSITIVE', 'score': 0.9998}] +``` + +#### `token_classification` +Token-level classification for NER, POS tagging, etc. + +**Parameters:** +- `text` (string, required): Input text +- `model_id` (string, optional): Model ID (default: 'dslim/bert-base-NER') + +**Popular models:** +- `dslim/bert-base-NER` - Named Entity Recognition +- `Jean-Baptiste/roberta-large-ner-english` - Large NER model +- `dbmdz/bert-large-cased-finetuned-conll03-english` - CoNLL-2003 NER + +**Example:** +```python +result = await token_classification( + text="Apple Inc. is located in Cupertino, California.", + model_id="dslim/bert-base-NER" +) +# Returns entities: ORG (Apple Inc.), LOC (Cupertino), LOC (California) +``` + +### Question Answering & Text Processing + +#### `question_answering` +Answer questions based on provided context. + +**Parameters:** +- `question` (string, required): Question to answer +- `context` (string, required): Context containing the answer +- `model_id` (string, optional): Model ID (default: 'deepset/roberta-base-squad2') + +**Popular models:** +- `deepset/roberta-base-squad2` - RoBERTa on SQuAD 2.0 +- `distilbert-base-cased-distilled-squad` - DistilBERT on SQuAD + +**Example:** +```python +result = await question_answering( + question="Where is the Eiffel Tower located?", + context="The Eiffel Tower is a landmark in Paris, France. It was built in 1889.", + model_id="deepset/roberta-base-squad2" +) +# Returns: {'answer': 'Paris, France', 'score': 0.98, 'start': 35, 'end': 48} +``` + +#### `summarization` +Summarize long text into shorter version. + +**Parameters:** +- `text` (string, required): Text to summarize +- `model_id` (string, optional): Model ID (default: 'facebook/bart-large-cnn') +- `max_length` (int, optional): Maximum summary length +- `min_length` (int, optional): Minimum summary length + +**Popular models:** +- `facebook/bart-large-cnn` - BART CNN summarization +- `google/pegasus-xsum` - PEGASUS XSum +- `sshleifer/distilbart-cnn-12-6` - Distilled BART + +**Example:** +```python +result = await summarization( + text="Long article text here...", + model_id="facebook/bart-large-cnn", + max_length=130, + min_length=30 +) +``` + +#### `translation` +Translate text between languages. + +**Parameters:** +- `text` (string, required): Text to translate +- `model_id` (string, required): Model ID for language pair + +**Popular models:** +- `Helsinki-NLP/opus-mt-en-es` - English to Spanish +- `Helsinki-NLP/opus-mt-es-en` - Spanish to English +- `Helsinki-NLP/opus-mt-en-fr` - English to French +- `Helsinki-NLP/opus-mt-en-de` - English to German +- `facebook/mbart-large-50-many-to-many-mmt` - Multilingual (50 languages) + +**Example:** +```python +result = await translation( + text="Hello, how are you?", + model_id="Helsinki-NLP/opus-mt-en-es" +) +# Returns: "Hola, ¿cómo estás?" +``` + +### Image Generation Tools + +#### `text_to_image` +Generate images from text prompts. + +**Parameters:** +- `prompt` (string, required): Text description of desired image +- `model_id` (string, optional): Model ID (default: 'black-forest-labs/FLUX.1-dev') +- `negative_prompt` (string, optional): What to avoid in image +- `num_inference_steps` (int, optional): Number of denoising steps +- `guidance_scale` (float, optional): How closely to follow prompt + +**Popular models:** +- `black-forest-labs/FLUX.1-dev` - FLUX.1 (high quality) +- `stabilityai/stable-diffusion-xl-base-1.0` - SDXL +- `stabilityai/stable-diffusion-2-1` - SD 2.1 +- `runwayml/stable-diffusion-v1-5` - SD 1.5 + +**Example:** +```python +result = await text_to_image( + prompt="A serene mountain landscape at sunset, photorealistic, 8k", + model_id="black-forest-labs/FLUX.1-dev", + negative_prompt="blurry, low quality, distorted", + guidance_scale=7.5 +) +# Returns: {'image': 'base64_encoded_image', 'format': 'base64'} +``` + +### Computer Vision Tools + +#### `image_to_text` +Generate text descriptions from images (captioning). + +**Parameters:** +- `image_base64` (string, required): Base64 encoded image +- `model_id` (string, optional): Model ID (default: 'Salesforce/blip-image-captioning-large') + +**Popular models:** +- `Salesforce/blip-image-captioning-large` - BLIP large +- `nlpconnect/vit-gpt2-image-captioning` - ViT-GPT2 + +**Example:** +```python +result = await image_to_text( + image_base64="base64_encoded_image_data", + model_id="Salesforce/blip-image-captioning-large" +) +# Returns: [{'generated_text': 'a dog playing in the park'}] +``` + +#### `image_classification` +Classify images into categories. + +**Parameters:** +- `image_base64` (string, required): Base64 encoded image +- `model_id` (string, optional): Model ID (default: 'google/vit-base-patch16-224') + +**Popular models:** +- `google/vit-base-patch16-224` - Vision Transformer +- `microsoft/resnet-50` - ResNet-50 + +**Example:** +```python +result = await image_classification( + image_base64="base64_encoded_image_data", + model_id="google/vit-base-patch16-224" +) +# Returns: [{'label': 'golden retriever', 'score': 0.95}, ...] +``` + +#### `object_detection` +Detect objects in images with bounding boxes. + +**Parameters:** +- `image_base64` (string, required): Base64 encoded image +- `model_id` (string, optional): Model ID (default: 'facebook/detr-resnet-50') + +**Popular models:** +- `facebook/detr-resnet-50` - DETR with ResNet-50 +- `hustvl/yolos-tiny` - YOLOS tiny + +**Example:** +```python +result = await object_detection( + image_base64="base64_encoded_image_data", + model_id="facebook/detr-resnet-50" +) +# Returns: [{'label': 'dog', 'score': 0.98, 'box': {...}}, ...] +``` + +### Audio Tools + +#### `text_to_speech` +Convert text to speech audio. + +**Parameters:** +- `text` (string, required): Text to synthesize +- `model_id` (string, optional): Model ID (default: 'facebook/mms-tts-eng') + +**Popular models:** +- `facebook/mms-tts-eng` - MMS TTS English +- `espnet/kan-bayashi_ljspeech_vits` - VITS LJSpeech + +**Example:** +```python +result = await text_to_speech( + text="Hello, this is a test of text to speech.", + model_id="facebook/mms-tts-eng" +) +# Returns: {'audio': 'base64_encoded_audio', 'format': 'base64'} +``` + +#### `automatic_speech_recognition` +Transcribe audio to text (speech recognition). + +**Parameters:** +- `audio_base64` (string, required): Base64 encoded audio +- `model_id` (string, optional): Model ID (default: 'openai/whisper-large-v3') + +**Popular models:** +- `openai/whisper-large-v3` - Whisper large v3 (best quality) +- `openai/whisper-medium` - Whisper medium (faster) +- `facebook/wav2vec2-base-960h` - Wav2Vec 2.0 + +**Example:** +```python +result = await automatic_speech_recognition( + audio_base64="base64_encoded_audio_data", + model_id="openai/whisper-large-v3" +) +# Returns: {'text': 'transcribed audio text here'} +``` + +### Embedding & Similarity Tools + +#### `sentence_similarity` +Compute similarity between sentences. + +**Parameters:** +- `source_sentence` (string, required): Reference sentence +- `sentences` (list, required): List of sentences to compare +- `model_id` (string, optional): Model ID (default: 'sentence-transformers/all-MiniLM-L6-v2') + +**Popular models:** +- `sentence-transformers/all-MiniLM-L6-v2` - Fast, good quality +- `sentence-transformers/all-mpnet-base-v2` - Best quality +- `BAAI/bge-small-en-v1.5` - BGE small + +**Example:** +```python +result = await sentence_similarity( + source_sentence="The cat sits on the mat", + sentences=[ + "A cat is sitting on a mat", + "The dog runs in the park", + "Cats are great pets" + ], + model_id="sentence-transformers/all-MiniLM-L6-v2" +) +# Returns: [0.95, 0.23, 0.65] +``` + +#### `feature_extraction` +Get embeddings (feature vectors) for text. + +**Parameters:** +- `text` (string, required): Input text +- `model_id` (string, optional): Model ID (default: 'sentence-transformers/all-MiniLM-L6-v2') + +**Popular models:** +- `sentence-transformers/all-MiniLM-L6-v2` - 384 dimensions +- `sentence-transformers/all-mpnet-base-v2` - 768 dimensions +- `BAAI/bge-small-en-v1.5` - 384 dimensions + +**Example:** +```python +result = await feature_extraction( + text="This is a sample sentence.", + model_id="sentence-transformers/all-MiniLM-L6-v2" +) +# Returns: [[0.012, -0.034, 0.056, ...]] (384-dimensional vector) +``` + +#### `fill_mask` +Fill in masked words in text. + +**Parameters:** +- `text` (string, required): Text with [MASK] token +- `model_id` (string, optional): Model ID (default: 'bert-base-uncased') + +**Popular models:** +- `bert-base-uncased` - BERT base +- `roberta-base` - RoBERTa base +- `distilbert-base-uncased` - DistilBERT + +**Example:** +```python +result = await fill_mask( + text="Paris is the [MASK] of France.", + model_id="bert-base-uncased" +) +# Returns: [{'token_str': 'capital', 'score': 0.95}, ...] +``` + +## Model Loading & Cold Starts + +**Important**: Models may take 20-60 seconds to load on first request (cold start). Subsequent requests are faster. + +**Tips:** +- Use popular models for faster loading +- Implement retry logic for timeouts +- Consider caching model responses +- Use smaller models for faster inference + +## Rate Limits + +### Free Tier +- Rate limited to prevent abuse +- Suitable for testing and small projects +- May experience queuing during high load + +### Pro Subscription ($9/month) +- No rate limits +- Priority access to models +- Faster inference +- No queuing + +Visit [huggingface.co/pricing](https://huggingface.co/pricing) for details. + +## Base64 Encoding + +For images and audio, you need to provide base64 encoded data: + +**Python example:** +```python +import base64 + +# Encode image +with open("image.jpg", "rb") as f: + image_base64 = base64.b64encode(f.read()).decode('utf-8') + +# Encode audio +with open("audio.wav", "rb") as f: + audio_base64 = base64.b64encode(f.read()).decode('utf-8') + +# Decode image response +image_bytes = base64.b64decode(response['image']) +with open("generated.jpg", "wb") as f: + f.write(image_bytes) +``` + +## Parameter Tuning + +### Text Generation +- **temperature** (0-2): Higher = more creative/random, Lower = more focused/deterministic +- **top_p** (0-1): Nucleus sampling, typically 0.9-0.95 +- **top_k**: Number of highest probability tokens to keep +- **repetition_penalty**: Penalize repeated tokens (>1.0 reduces repetition) + +### Image Generation +- **guidance_scale** (1-20): Higher = follows prompt more strictly (typical: 7-7.5) +- **num_inference_steps**: More steps = higher quality but slower (typical: 20-50) +- **negative_prompt**: Describe what you don't want in the image + +## Error Handling + +Common errors: + +- **503 Service Unavailable**: Model is loading (cold start), retry after 20-60 seconds +- **401 Unauthorized**: Invalid or missing API token +- **429 Too Many Requests**: Rate limit exceeded (upgrade to Pro) +- **400 Bad Request**: Invalid parameters or model ID +- **504 Gateway Timeout**: Model took too long to respond + +**Retry logic example:** +```python +import time + +max_retries = 3 +for attempt in range(max_retries): + try: + result = await text_generation(prompt="Hello") + break + except httpx.HTTPStatusError as e: + if e.response.status_code == 503 and attempt < max_retries - 1: + time.sleep(20) # Wait for model to load + continue + raise +``` + +## Finding Models + +**Browse models:** +- Visit [huggingface.co/models](https://huggingface.co/models) +- Filter by task (Text Generation, Image Generation, etc.) +- Sort by downloads, likes, or trending +- Check model card for usage examples + +**Popular categories:** +- Text Generation: 50,000+ models +- Text Classification: 30,000+ models +- Image Generation: 10,000+ models +- Translation: 5,000+ models +- Embeddings: 3,000+ models + +## Best Practices + +1. **Use popular models**: Faster loading and better maintained +2. **Implement timeouts**: Set appropriate timeouts (60-120 seconds) +3. **Cache responses**: Store results to reduce API calls +4. **Handle cold starts**: Implement retry logic for 503 errors +5. **Monitor usage**: Track API calls and costs +6. **Test locally**: Use Hugging Face Transformers library for testing +7. **Read model cards**: Understand model capabilities and limitations +8. **Optimize parameters**: Tune settings for your use case + +## Use Cases + +- **Chatbots**: LLM-powered conversational AI +- **Content Generation**: Blog posts, articles, creative writing +- **Image Creation**: Art, illustrations, product images +- **Sentiment Analysis**: Customer feedback analysis +- **Translation**: Multi-language support +- **Transcription**: Meeting notes, podcast transcripts +- **Semantic Search**: Embedding-based search +- **Data Extraction**: NER for document processing +- **Content Moderation**: Text and image classification + +## API Documentation + +- [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index) +- [Supported Tasks](https://huggingface.co/docs/api-inference/supported-tasks) +- [Model Hub](https://huggingface.co/models) +- [Pricing](https://huggingface.co/pricing) + +## Support + +- [Hugging Face Forums](https://discuss.huggingface.co/) +- [Discord Community](https://huggingface.co/join/discord) +- [Documentation](https://huggingface.co/docs) +- [Status Page](https://status.huggingface.co/) diff --git a/servers/huggingface/requirements.txt b/servers/huggingface/requirements.txt new file mode 100644 index 0000000..42f0167 --- /dev/null +++ b/servers/huggingface/requirements.txt @@ -0,0 +1,4 @@ +fastmcp>=0.2.0 +httpx>=0.27.0 +python-dotenv>=1.0.0 +uvicorn>=0.30.0 diff --git a/servers/huggingface/server.json b/servers/huggingface/server.json new file mode 100644 index 0000000..42c980a --- /dev/null +++ b/servers/huggingface/server.json @@ -0,0 +1,65 @@ +{ + "name": "ai.nimbletools/huggingface", + "version": "1.0.0", + "description": "Hugging Face API: LLMs, image generation, text classification, embeddings, and 200,000+ models", + "category": "ai-ml", + "status": "active", + "homepage": "https://github.com/NimbleBrainInc/mcp-registry/tree/main/servers/huggingface", + "repository": { + "url": "https://github.com/NimbleBrainInc/mcp-huggingface", + "source": "github", + "branch": "main" + }, + "websiteUrl": "https://huggingface.co/", + "license": "MIT", + "tags": [ + "huggingface", + "llm", + "machine-learning", + "text-generation", + "image-generation", + "stable-diffusion", + "embeddings", + "nlp", + "computer-vision", + "requires-api-key" + ], + "transport": { + "type": "streamable-http", + "url": "https://mcp.nimbletools.ai/mcp" + }, + "environmentVariables": { + "HUGGINGFACE_API_TOKEN": { + "description": "Hugging Face API token (get from https://huggingface.co/settings/tokens)", + "required": true, + "secret": true, + "example": "hf_..." + } + }, + "_meta": { + "display": { + "icon": "https://cdn.simpleicons.org/huggingface", + "color": "#FFD21E" + }, + "deployment": { + "protocol": "http", + "registry": "ghcr.io", + "image": "ghcr.io/nimblebraininc/mcp-huggingface:latest", + "port": 8000, + "mcpPath": "/mcp" + }, + "resources": { + "limits": { + "cpu": "500m", + "memory": "512Mi" + }, + "requests": { + "cpu": "250m", + "memory": "256Mi" + } + }, + "capabilities": { + "tools": true + } + } +} diff --git a/servers/huggingface/server.py b/servers/huggingface/server.py new file mode 100644 index 0000000..9e01025 --- /dev/null +++ b/servers/huggingface/server.py @@ -0,0 +1,567 @@ +""" +Hugging Face MCP Server +Provides tools for accessing Hugging Face Inference API for ML models. +""" + +import os +from typing import Optional, Dict, Any +import httpx +from fastmcp import FastMCP + +# Initialize FastMCP server +mcp = FastMCP("Hugging Face MCP Server") + +# Get API token from environment +HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") +BASE_URL = "https://api-inference.huggingface.co" + + +def get_headers() -> dict: + """Get headers for Hugging Face API requests.""" + if not HUGGINGFACE_API_TOKEN: + raise ValueError("HUGGINGFACE_API_TOKEN environment variable is required") + return { + "Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}", + "Content-Type": "application/json", + } + + +@mcp.tool() +async def text_generation( + prompt: str, + model_id: str = "mistralai/Mistral-7B-Instruct-v0.3", + max_new_tokens: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + top_k: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: bool = False +) -> dict: + """ + Generate text using large language models. + + Args: + prompt: Input text prompt + model_id: Model ID (default: 'mistralai/Mistral-7B-Instruct-v0.3') + max_new_tokens: Maximum tokens to generate (optional) + temperature: Sampling temperature 0-2 (higher = more random, optional) + top_p: Nucleus sampling parameter 0-1 (optional) + top_k: Top-k sampling parameter (optional) + repetition_penalty: Penalty for repetition (optional) + return_full_text: Return full text including prompt (default: False) + + Returns: + Dictionary with generated text + + Popular models: meta-llama/Llama-3.2-3B-Instruct, mistralai/Mistral-7B-Instruct-v0.3, google/gemma-2-2b-it + """ + async with httpx.AsyncClient(timeout=120.0) as client: + payload = {"inputs": prompt} + parameters = {} + + if max_new_tokens is not None: + parameters["max_new_tokens"] = max_new_tokens + if temperature is not None: + parameters["temperature"] = temperature + if top_p is not None: + parameters["top_p"] = top_p + if top_k is not None: + parameters["top_k"] = top_k + if repetition_penalty is not None: + parameters["repetition_penalty"] = repetition_penalty + if not return_full_text: + parameters["return_full_text"] = False + + if parameters: + payload["parameters"] = parameters + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def text_classification( + text: str, + model_id: str = "distilbert-base-uncased-finetuned-sst-2-english" +) -> dict: + """ + Classify text into categories (sentiment, topic, etc.). + + Args: + text: Input text to classify + model_id: Model ID (default: 'distilbert-base-uncased-finetuned-sst-2-english') + + Returns: + Dictionary with classification labels and scores + + Popular models: distilbert-base-uncased-finetuned-sst-2-english (sentiment), facebook/bart-large-mnli (zero-shot) + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def token_classification( + text: str, + model_id: str = "dslim/bert-base-NER" +) -> dict: + """ + Token classification for named entity recognition, POS tagging, etc. + + Args: + text: Input text + model_id: Model ID (default: 'dslim/bert-base-NER') + + Returns: + Dictionary with token entities and labels + + Popular models: dslim/bert-base-NER, Jean-Baptiste/roberta-large-ner-english + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def question_answering( + question: str, + context: str, + model_id: str = "deepset/roberta-base-squad2" +) -> dict: + """ + Answer questions based on provided context. + + Args: + question: Question to answer + context: Context containing the answer + model_id: Model ID (default: 'deepset/roberta-base-squad2') + + Returns: + Dictionary with answer, score, start/end positions + + Popular models: deepset/roberta-base-squad2, distilbert-base-cased-distilled-squad + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = { + "inputs": { + "question": question, + "context": context + } + } + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def summarization( + text: str, + model_id: str = "facebook/bart-large-cnn", + max_length: Optional[int] = None, + min_length: Optional[int] = None +) -> dict: + """ + Summarize long text into shorter version. + + Args: + text: Input text to summarize + model_id: Model ID (default: 'facebook/bart-large-cnn') + max_length: Maximum summary length (optional) + min_length: Minimum summary length (optional) + + Returns: + Dictionary with summary text + + Popular models: facebook/bart-large-cnn, google/pegasus-xsum, sshleifer/distilbart-cnn-12-6 + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + parameters = {} + + if max_length is not None: + parameters["max_length"] = max_length + if min_length is not None: + parameters["min_length"] = min_length + + if parameters: + payload["parameters"] = parameters + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def translation( + text: str, + model_id: str = "Helsinki-NLP/opus-mt-en-es" +) -> dict: + """ + Translate text between languages. + + Args: + text: Input text to translate + model_id: Model ID (default: 'Helsinki-NLP/opus-mt-en-es' for English to Spanish) + + Returns: + Dictionary with translated text + + Popular models: Helsinki-NLP/opus-mt-en-es (en->es), Helsinki-NLP/opus-mt-es-en (es->en), + Helsinki-NLP/opus-mt-en-fr (en->fr), facebook/mbart-large-50-many-to-many-mmt (multilingual) + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def text_to_image( + prompt: str, + model_id: str = "black-forest-labs/FLUX.1-dev", + negative_prompt: Optional[str] = None, + num_inference_steps: Optional[int] = None, + guidance_scale: Optional[float] = None +) -> dict: + """ + Generate images from text prompts. + + Args: + prompt: Text description of desired image + model_id: Model ID (default: 'black-forest-labs/FLUX.1-dev') + negative_prompt: What to avoid in image (optional) + num_inference_steps: Number of denoising steps (optional, default varies by model) + guidance_scale: How closely to follow prompt (optional, default varies by model) + + Returns: + Dictionary with base64 encoded image + + Popular models: black-forest-labs/FLUX.1-dev, stabilityai/stable-diffusion-xl-base-1.0, + stabilityai/stable-diffusion-2-1, runwayml/stable-diffusion-v1-5 + """ + async with httpx.AsyncClient(timeout=120.0) as client: + payload = {"inputs": prompt} + parameters = {} + + if negative_prompt is not None: + parameters["negative_prompt"] = negative_prompt + if num_inference_steps is not None: + parameters["num_inference_steps"] = num_inference_steps + if guidance_scale is not None: + parameters["guidance_scale"] = guidance_scale + + if parameters: + payload["parameters"] = parameters + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + + # Image is returned as bytes + import base64 + image_bytes = response.content + image_base64 = base64.b64encode(image_bytes).decode('utf-8') + + return { + "image": image_base64, + "format": "base64", + "content_type": response.headers.get("content-type", "image/jpeg") + } + + +@mcp.tool() +async def image_to_text( + image_base64: str, + model_id: str = "Salesforce/blip-image-captioning-large" +) -> dict: + """ + Generate text descriptions from images (image captioning). + + Args: + image_base64: Base64 encoded image + model_id: Model ID (default: 'Salesforce/blip-image-captioning-large') + + Returns: + Dictionary with generated caption + + Popular models: Salesforce/blip-image-captioning-large, nlpconnect/vit-gpt2-image-captioning + """ + async with httpx.AsyncClient(timeout=60.0) as client: + import base64 + image_bytes = base64.b64decode(image_base64) + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers={"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}, + content=image_bytes, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def image_classification( + image_base64: str, + model_id: str = "google/vit-base-patch16-224" +) -> dict: + """ + Classify images into categories. + + Args: + image_base64: Base64 encoded image + model_id: Model ID (default: 'google/vit-base-patch16-224') + + Returns: + Dictionary with classification labels and scores + + Popular models: google/vit-base-patch16-224, microsoft/resnet-50 + """ + async with httpx.AsyncClient(timeout=60.0) as client: + import base64 + image_bytes = base64.b64decode(image_base64) + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers={"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}, + content=image_bytes, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def object_detection( + image_base64: str, + model_id: str = "facebook/detr-resnet-50" +) -> dict: + """ + Detect objects in images with bounding boxes. + + Args: + image_base64: Base64 encoded image + model_id: Model ID (default: 'facebook/detr-resnet-50') + + Returns: + Dictionary with detected objects, labels, scores, and bounding boxes + + Popular models: facebook/detr-resnet-50, hustvl/yolos-tiny + """ + async with httpx.AsyncClient(timeout=60.0) as client: + import base64 + image_bytes = base64.b64decode(image_base64) + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers={"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}, + content=image_bytes, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def text_to_speech( + text: str, + model_id: str = "facebook/mms-tts-eng" +) -> dict: + """ + Convert text to speech audio. + + Args: + text: Input text to synthesize + model_id: Model ID (default: 'facebook/mms-tts-eng') + + Returns: + Dictionary with base64 encoded audio + + Popular models: facebook/mms-tts-eng, espnet/kan-bayashi_ljspeech_vits + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + + # Audio is returned as bytes + import base64 + audio_bytes = response.content + audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') + + return { + "audio": audio_base64, + "format": "base64", + "content_type": response.headers.get("content-type", "audio/flac") + } + + +@mcp.tool() +async def automatic_speech_recognition( + audio_base64: str, + model_id: str = "openai/whisper-large-v3" +) -> dict: + """ + Transcribe audio to text (speech recognition). + + Args: + audio_base64: Base64 encoded audio file + model_id: Model ID (default: 'openai/whisper-large-v3') + + Returns: + Dictionary with transcribed text + + Popular models: openai/whisper-large-v3, openai/whisper-medium, facebook/wav2vec2-base-960h + """ + async with httpx.AsyncClient(timeout=120.0) as client: + import base64 + audio_bytes = base64.b64decode(audio_base64) + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers={"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}, + content=audio_bytes, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def sentence_similarity( + source_sentence: str, + sentences: list[str], + model_id: str = "sentence-transformers/all-MiniLM-L6-v2" +) -> dict: + """ + Compute similarity between sentences using embeddings. + + Args: + source_sentence: Reference sentence + sentences: List of sentences to compare + model_id: Model ID (default: 'sentence-transformers/all-MiniLM-L6-v2') + + Returns: + Dictionary with similarity scores + + Popular models: sentence-transformers/all-MiniLM-L6-v2, sentence-transformers/all-mpnet-base-v2 + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = { + "inputs": { + "source_sentence": source_sentence, + "sentences": sentences + } + } + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def fill_mask( + text: str, + model_id: str = "bert-base-uncased" +) -> dict: + """ + Fill in masked words in text (use [MASK] token). + + Args: + text: Input text with [MASK] token + model_id: Model ID (default: 'bert-base-uncased') + + Returns: + Dictionary with predicted tokens and scores + + Popular models: bert-base-uncased, roberta-base, distilbert-base-uncased + + Example: "Paris is the [MASK] of France." + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +@mcp.tool() +async def feature_extraction( + text: str, + model_id: str = "sentence-transformers/all-MiniLM-L6-v2" +) -> dict: + """ + Get embeddings (feature vectors) for text. + + Args: + text: Input text + model_id: Model ID (default: 'sentence-transformers/all-MiniLM-L6-v2') + + Returns: + Dictionary with embedding vectors + + Popular models: sentence-transformers/all-MiniLM-L6-v2, sentence-transformers/all-mpnet-base-v2, + BAAI/bge-small-en-v1.5 + """ + async with httpx.AsyncClient(timeout=60.0) as client: + payload = {"inputs": text} + + response = await client.post( + f"{BASE_URL}/models/{model_id}", + headers=get_headers(), + json=payload, + ) + response.raise_for_status() + return response.json() + + +if __name__ == "__main__": + mcp.run() diff --git a/servers/huggingface/test.json b/servers/huggingface/test.json new file mode 100644 index 0000000..ab28921 --- /dev/null +++ b/servers/huggingface/test.json @@ -0,0 +1,64 @@ +{ + "tests": [ + { + "name": "Text Generation", + "tool": "text_generation", + "params": { + "prompt": "Once upon a time", + "model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "max_new_tokens": 50, + "temperature": 0.7 + }, + "expectedFields": [], + "assertions": [ + { + "type": "exists", + "path": "[0].generated_text" + } + ] + }, + { + "name": "Text Classification", + "tool": "text_classification", + "params": { + "text": "I love this product! It's amazing!", + "model_id": "distilbert-base-uncased-finetuned-sst-2-english" + }, + "expectedFields": [], + "assertions": [ + { + "type": "exists", + "path": "[0]" + }, + { + "type": "exists", + "path": "[0][0].label" + }, + { + "type": "exists", + "path": "[0][0].score" + } + ] + }, + { + "name": "Sentence Similarity", + "tool": "sentence_similarity", + "params": { + "source_sentence": "That is a happy person", + "sentences": [ + "That is a happy dog", + "That is a very happy person", + "Today is a sunny day" + ], + "model_id": "sentence-transformers/all-MiniLM-L6-v2" + }, + "expectedFields": [], + "assertions": [ + { + "type": "exists", + "path": "[0]" + } + ] + } + ] +}