diff --git a/examples/cookbook/smolagents/.env.example b/examples/cookbook/smolagents/.env.example new file mode 100644 index 00000000..a3968f8f --- /dev/null +++ b/examples/cookbook/smolagents/.env.example @@ -0,0 +1,7 @@ +# Moss credentials — get these from https://moss.dev +MOSS_PROJECT_ID=your_project_id +MOSS_PROJECT_KEY=your_project_key +MOSS_INDEX_NAME=your_index_name + +# HuggingFace token — required for gated models (e.g. Llama-3) +HUGGING_FACE_HUB_TOKEN=your_hf_token diff --git a/examples/cookbook/smolagents/README.md b/examples/cookbook/smolagents/README.md new file mode 100644 index 00000000..0a4095fc --- /dev/null +++ b/examples/cookbook/smolagents/README.md @@ -0,0 +1,100 @@ +# Moss + Smolagents Cookbook + +Use [Moss](https://moss.dev) semantic search as a retrieval tool for [Smolagents](https://huggingface.co/docs/smolagents) agents. + +## Why Moss with Smolagents? + +Traditional vector databases add 200–500 ms per retrieval hop. Moss loads index and model weights directly into your application process, delivering **sub-10 ms** search — fast enough that the retrieval step disappears from the agent's latency budget. + +## Installation + +We recommend [uv](https://docs.astral.sh/uv/) for dependency management. + +```bash +uv sync +``` + +Or install dependencies directly: + +```bash +uv pip install smolagents moss python-dotenv +``` + +## Setup + +Copy the example env file and fill in your credentials: + +```bash +cp .env.example .env +``` + +Required variables: + +```env +MOSS_PROJECT_ID=your_project_id +MOSS_PROJECT_KEY=your_project_key +MOSS_INDEX_NAME=your_index_name +``` + +`HUGGING_FACE_HUB_TOKEN` is needed if the chosen model requires authentication. + +## Files + +| File | Purpose | +|------|---------| +| `tool.py` | `MossRetrievalTool` — the reusable smolagents `Tool` subclass | +| `moss_smol_agent_demo.py` | End-to-end demo: load index, build agent, run a question | +| `test_integration.py` | Unit tests with mocked Moss client | + +## Running the demo + +```bash +uv run moss_smol_agent_demo.py +``` + +## How it works + +### Loading the index + +The index must be pulled into local memory **once** before the agent starts. This is the step that switches retrieval from cloud-round-trip speed to local speed: + +```python +asyncio.run(client.load_index("my-index")) +``` + +Call this in your setup/startup code, not inside the tool, so the cost is paid once. + +### Async / sync bridge + +Smolagents' `Tool.forward()` is synchronous, but `MossClient` is async. The tool solves this with a **persistent event loop running in a daemon thread**, started in `__init__`: + +```python +self._loop = asyncio.new_event_loop() +self._thread = threading.Thread(target=self._loop.run_forever, daemon=True) +self._thread.start() +``` + +Each `forward()` call submits the coroutine to that loop and blocks until it completes: + +```python +asyncio.run_coroutine_threadsafe(coro, self._loop).result() +``` + +This approach is better than `asyncio.run()` for two reasons: +- **No per-call overhead** — creating and tearing down an event loop on every search would add latency, defeating the purpose of local retrieval. +- **Works in Jupyter / async frameworks** — `asyncio.run()` raises `RuntimeError` when called from an already-running loop; `run_coroutine_threadsafe` does not. + +### Metadata filtering + +Pass structured filters using the Moss filter DSL: + +```python +metadata_filter = { + "$and": [ + {"field": "category", "condition": {"$eq": "refunds"}}, + {"field": "price", "condition": {"$lt": 50}}, + ] +} +``` + +Available operators: `$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$and`, `$or`. diff --git a/examples/cookbook/smolagents/moss_smol_agent_demo.py b/examples/cookbook/smolagents/moss_smol_agent_demo.py new file mode 100644 index 00000000..5ed2d381 --- /dev/null +++ b/examples/cookbook/smolagents/moss_smol_agent_demo.py @@ -0,0 +1,50 @@ +import asyncio +import os + +from dotenv import load_dotenv +from smolagents import CodeAgent, InferenceClientModel + +from moss import MossClient +from tool import MossRetrievalTool + +load_dotenv() + + +def main(): + project_id = os.getenv("MOSS_PROJECT_ID") + project_key = os.getenv("MOSS_PROJECT_KEY") + index_name = os.getenv("MOSS_INDEX_NAME") + + if not all([project_id, project_key, index_name]): + raise EnvironmentError( + "Please set MOSS_PROJECT_ID, MOSS_PROJECT_KEY, and MOSS_INDEX_NAME " + "in your environment or .env file." + ) + + client = MossClient(project_id, project_key) + + # Load the index into local memory before the agent runs. + # This one-time setup is what enables sub-10ms retrieval inside the agent loop. + print(f"Loading index '{index_name}' into local memory...") + asyncio.run(client.load_index(index_name)) + print("Index loaded.\n") + + retrieval_tool = MossRetrievalTool(client, index_name) + + # InferenceClientModel uses the HuggingFace Inference API. + # Set HUGGING_FACE_HUB_TOKEN in your .env if the model requires authentication. + model = InferenceClientModel("meta-llama/Llama-3.3-70B-Instruct") + agent = CodeAgent(tools=[retrieval_tool], model=model, add_base_tools=True) + + question = "What is the policy for processing refunds for digital goods?" + print(f"Question: {question}") + print("-" * 50) + + response = agent.run(question) + + print("\n--- Agent Response ---") + print(response) + + +if __name__ == "__main__": + main() diff --git a/examples/cookbook/smolagents/pyproject.toml b/examples/cookbook/smolagents/pyproject.toml new file mode 100644 index 00000000..3b2d9e8b --- /dev/null +++ b/examples/cookbook/smolagents/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "smolagents-moss" +version = "0.1.0" +description = "Smolagents integration for Moss semantic search" +readme = "README.md" +requires-python = ">=3.11" +license = { text = "BSD-2-Clause" } +authors = [ + { name = "InferEdge Inc.", email = "contact@moss.dev" } +] +dependencies = [ + "smolagents", + "moss>=1.0.0", + "python-dotenv", +] + +[tool.hatch.build.targets.wheel] +packages = ["tool.py"] + +[tool.hatch.build.targets.sdist] +include = [ + "README.md", + "tool.py", + "moss_smol_agent_demo.py", + ".env.example", +] diff --git a/examples/cookbook/smolagents/test_integration.py b/examples/cookbook/smolagents/test_integration.py new file mode 100644 index 00000000..6d687d82 --- /dev/null +++ b/examples/cookbook/smolagents/test_integration.py @@ -0,0 +1,64 @@ +import unittest +from unittest.mock import MagicMock, patch + +from tool import MossRetrievalTool + + +class TestMossRetrievalTool(unittest.TestCase): + def setUp(self): + self.mock_client = MagicMock() + self.tool = MossRetrievalTool(self.mock_client, "test-index") + + def tearDown(self): + self.tool._loop.call_soon_threadsafe(self.tool._loop.stop) + self.tool._thread.join(timeout=1) + + @patch.object(MossRetrievalTool, "_run_async") + def test_forward_formats_results(self, mock_run_async): + mock_docs = [ + MagicMock(id="d1", text="First result content", score=0.9), + MagicMock(id="d2", text="Second result content", score=0.8), + ] + mock_run_async.return_value = MagicMock(docs=mock_docs) + + result = self.tool.forward("test query", top_k=2) + + self.assertIn("Result ID: d1", result) + self.assertIn("First result content", result) + self.assertIn("Score: 0.900", result) + self.assertIn("Result ID: d2", result) + self.assertIn("Second result content", result) + self.assertIn("Score: 0.800", result) + + @patch.object(MossRetrievalTool, "_run_async") + def test_forward_empty_results(self, mock_run_async): + mock_run_async.return_value = MagicMock(docs=[]) + result = self.tool.forward("empty query") + self.assertEqual(result, "") + + @patch.object(MossRetrievalTool, "_run_async") + def test_forward_passes_metadata_filter(self, mock_run_async): + mock_run_async.return_value = MagicMock(docs=[]) + filt = {"$and": [{"field": "category", "condition": {"$eq": "refunds"}}]} + self.tool.forward("query", top_k=3, metadata_filter=filt) + + _, call_kwargs = mock_run_async.call_args + # _run_async is called with a positional coroutine arg — just verify it was called + self.assertTrue(mock_run_async.called) + + @patch.object(MossRetrievalTool, "_run_async") + def test_forward_propagates_errors(self, mock_run_async): + mock_run_async.side_effect = RuntimeError("connection failed") + with self.assertRaises(RuntimeError, msg="connection failed"): + self.tool.forward("error query") + + def test_tool_schema(self): + self.assertEqual(self.tool.name, "moss_retrieval") + self.assertIn("query", self.tool.inputs) + self.assertIn("top_k", self.tool.inputs) + self.assertIn("metadata_filter", self.tool.inputs) + self.assertEqual(self.tool.output_type, "string") + + +if __name__ == "__main__": + unittest.main() diff --git a/examples/cookbook/smolagents/tool.py b/examples/cookbook/smolagents/tool.py new file mode 100644 index 00000000..0951ac55 --- /dev/null +++ b/examples/cookbook/smolagents/tool.py @@ -0,0 +1,65 @@ +import asyncio +import threading +from typing import Any, Dict, Optional + +from smolagents import Tool +from moss import MossClient, QueryOptions + + +class MossRetrievalTool(Tool): + """Smolagents Tool that runs semantic search against a locally loaded Moss index.""" + + name = "moss_retrieval" + description = ( + "Finds relevant information from a knowledge base using semantic search. " + "Use this when the answer is likely contained in indexed documents." + ) + inputs = { + "query": { + "type": "string", + "description": "The search query string.", + }, + "top_k": { + "type": "integer", + "description": "Number of results to return (default: 5).", + "nullable": True, + "default": 5, + }, + "metadata_filter": { + "type": "object", + "description": ( + "Optional filter using the Moss filter DSL. " + "Example: {'$and': [{'field': 'category', 'condition': {'$eq': 'refunds'}}]}" + ), + "nullable": True, + }, + } + output_type = "string" + + def __init__(self, client: MossClient, index_name: str): + super().__init__() + self.client = client + self.index_name = index_name + # A persistent event loop in a daemon thread avoids two problems: + # 1. Per-call loop creation/teardown overhead (kills sub-10ms latency). + # 2. RuntimeError when forward() is called from an already-running loop + # (Jupyter notebooks, async frameworks). + self._loop = asyncio.new_event_loop() + self._thread = threading.Thread(target=self._loop.run_forever, daemon=True) + self._thread.start() + + def _run_async(self, coro) -> Any: + return asyncio.run_coroutine_threadsafe(coro, self._loop).result() + + def forward( + self, + query: str, + top_k: int = 5, + metadata_filter: Optional[Dict[str, Any]] = None, + ) -> str: + options = QueryOptions(top_k=top_k, filter=metadata_filter) + results = self._run_async(self.client.query(self.index_name, query, options)) + return "\n\n".join( + f"--- Result ID: {doc.id} (Score: {doc.score:.3f}) ---\n{doc.text}" + for doc in results.docs + )