feat: support use ollama as embedding provider(#138) (#139)

dishuostec · web-flow · commit 3f1911e41580 · 2025-10-28T22:52:38.000+08:00
* support use ollama as embedding provider

* add ollama embedding example config and update readme
diff --git a/src/server/api/example_config/ollama_embedding/config.yaml b/src/server/api/example_config/ollama_embedding/config.yaml
@@ -0,0 +1,11 @@
+llm_api_key: XXX
+llm_base_url: https://api.openai.com/v1/
+best_llm_model: gpt-4o
+
+embedding_provider: ollama
+embedding_api_key: ollama
+embedding_base_url: "http://127.0.0.1:11434/" # WITHOUT "v1" at the end
+embedding_model: "qwen3-embedding:4b-q4_K_M"
+embedding_dim: 2560
+
+language: en
diff --git a/src/server/api/memobase_server/env.py b/src/server/api/memobase_server/env.py
@@ -102,7 +102,7 @@ class Config:
     summary_llm_model: str = None
 
     enable_event_embedding: bool = True
-    embedding_provider: Literal["openai", "jina"] = "openai"
+    embedding_provider: Literal["openai", "jina", "ollama"] = "openai"
     embedding_api_key: str = None
     embedding_base_url: str = None
     embedding_dim: int = 1536
diff --git a/src/server/api/memobase_server/llms/embeddings/__init__.py b/src/server/api/memobase_server/llms/embeddings/__init__.py
@@ -9,10 +9,11 @@
 from .jina_embedding import jina_embedding
 from .openai_embedding import openai_embedding
 from .lmstudio_embedding import lmstudio_embedding
+from .ollama_embedding import ollama_embedding
 from ...telemetry import telemetry_manager, HistogramMetricName, CounterMetricName
 from ...utils import get_encoded_tokens
 
-FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding}
+FACTORIES = {"openai": openai_embedding, "jina": jina_embedding, "lmstudio": lmstudio_embedding, "ollama": ollama_embedding}
 assert (
     CONFIG.embedding_provider in FACTORIES
 ), f"Unsupported embedding provider: {CONFIG.embedding_provider}"
diff --git a/src/server/api/memobase_server/llms/embeddings/ollama_embedding.py b/src/server/api/memobase_server/llms/embeddings/ollama_embedding.py
@@ -0,0 +1,34 @@
+import numpy as np
+from typing import Literal
+from ...errors import ExternalAPIError
+from ...env import CONFIG, LOG
+from .utils import get_ollama_async_client_instance
+
+OLLAMA_TASK = {
+    "query": "retrieval.query",
+    "document": "retrieval.passage",
+}
+
+
+async def ollama_embedding(
+    model: str, texts: list[str], phase: Literal["query", "document"] = "document"
+) -> np.ndarray:
+    openai_async_client = get_ollama_async_client_instance()
+    response = await openai_async_client.post(
+        "/api/embed",
+        json={
+            "model": model,
+            "input": texts,
+            # "task": OLLAMA_TASK[phase],
+            "truncate": True,
+            "dimensions": CONFIG.embedding_dim,
+        },
+        timeout=20,
+    )
+    if response.status_code != 200:
+        raise ExternalAPIError(f"Failed to embed texts: {response.text}")
+    data = response.json()
+    LOG.info(
+        f"Ollama embedding, {model}, {data['load_duration']}/{data['total_duration']}"
+    )
+    return np.array(data["embeddings"])
diff --git a/src/server/api/memobase_server/llms/embeddings/utils.py b/src/server/api/memobase_server/llms/embeddings/utils.py
@@ -5,6 +5,7 @@
 _global_openai_async_client = None
 _global_jina_async_client = None
 _global_lmstudio_async_client = None
+_global_ollama_async_client = None
 
 
 def get_openai_async_client_instance() -> AsyncOpenAI:
@@ -34,3 +35,13 @@ def get_lmstudio_async_client_instance() -> AsyncClient:
             headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"},
         )
     return _global_lmstudio_async_client
+
+def get_ollama_async_client_instance() -> AsyncClient:
+    global _global_ollama_async_client
+    if _global_ollama_async_client is None:
+        _global_ollama_async_client = AsyncClient(
+            base_url=CONFIG.embedding_base_url,
+            headers={"Authorization": f"Bearer {CONFIG.embedding_api_key}"},
+        )
+    return _global_ollama_async_client
+
diff --git a/src/server/readme.md b/src/server/readme.md
@@ -29,14 +29,15 @@ Memobase uses a single  `config.yaml` to initialize the server. It contains the
 By default, Memobase enables user profile and event memory with filter ability. That means running a Memobase server requires you to have below things:
 
 - **LLM API**: You must fill the OpenAI API Key in `llm_api_key` of `config.yaml`.Or you can change `llm_base_url` to any OpenAI-SDK-Compatible service(via [vllm](https://github.com/vllm-project/vllm), [Ollama](../../assets/tutorials/ollama+memobase/readme.md),...). Alternatively, you can set `llm_api_key` and `llm_base_url` using environment variables `MEMOBASE_LLM_API_KEY` and `MEMOBASE_LLM_BASE_URL`
-- **Embedding API**: Memobase supports OpenAI-Compatible SDK and [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml`
+- **Embedding API**: Memobase supports OpenAI-Compatible SDK, [Jina Embedding](https://jina.ai/models/jina-embeddings-v3/) and [Ollama Embedding](https://docs.ollama.com/api#generate-embeddings). Memobase uses embedding API to retrieve related user events. If you don't have a embedding API, you can set `enable_event_embedding: false` in `config.yaml`
 
 We have some example `config.yaml` in `examplel_config`:
 
 - [`profile_for_assistant`](./api/example_config/profile_for_education),  [`profile_for_education`](./api/example_config/profile_for_education),  [`profile_for_companion`](./api/example_config/profile_for_companion)  are three similar configs in term of structure, but for different user cases.
 - [`event_tag`](./api/example_config/event_tag) is a feature to tracking temporal attributes of users. [doc](https://docs.memobase.io/features/event/event_tag)
 - [`only_strict_profile`](./api/example_config/only_strict_profile): disable all other features, only collect the profiles you design.
 - [`jina_embedding`](./api/example_config/jina_embedding) uses Jina exmbedding for event search.
+- [`ollama_embedding`](./api/example_config/ollama_embedding) uses Ollama exmbedding for event search.