From 3fa3a4d24c199597b0fb34c5cf35c9508ba583d2 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 15:56:08 +0900 Subject: [PATCH 1/7] add cohere embedding provider --- nemoguardrails/embeddings/providers/cohere.py | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 nemoguardrails/embeddings/providers/cohere.py diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py new file mode 100644 index 000000000..afda11aaf --- /dev/null +++ b/nemoguardrails/embeddings/providers/cohere.py @@ -0,0 +1,118 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio +from contextvars import ContextVar +from typing import List + +from .base import EmbeddingModel + +# We set the Cohere async client in an asyncio context variable because we need it +# to be scoped at the asyncio loop level. The client caches it somewhere, and if the loop +# is changed, it will fail. +async_client_var: ContextVar = ContextVar("async_client", default=None) + + +class CohereEmbeddingModel(EmbeddingModel): + """Embedding model using Cohere API. + + Args: + embedding_model (str): The name of the embedding model. + input_type (str): The type of input for the embedding model, default is "search_document". + "search_document", "search_query", "classification", "clustering", "image" + + Attributes: + model (str): The name of the embedding model. + embedding_size (int): The size of the embeddings. + + Methods: + encode: Encode a list of documents into embeddings. + """ + + engine_name = "cohere" + + def __init__( + self, + embedding_model: str, + input_type: str = "search_document", + **kwargs, + ): + try: + import cohere + from cohere import AsyncClient, Client + except ImportError: + raise ImportError( + "Could not import cohere, please install it with " + "`pip install cohere`." + ) + + self.model = embedding_model + self.input_type = input_type + self.client = cohere.Client(**kwargs) + + self.embedding_size_dict = { + "embed-v4.0": 1536, + "embed-english-v3.0": 1024, + "embed-english-light-v3.0": 384, + "embed-multilingual-v3.0": 1024, + "embed-multilingual-light-v3.0": 384, + } + + if self.model in self.embedding_size_dict: + self.embedding_size = self.embedding_size_dict[self.model] + else: + # Perform a first encoding to get the embedding size + self.embedding_size = len(self.encode(["test"])[0]) + + async def encode_async(self, documents: List[str]) -> List[List[float]]: + """Encode a list of documents into embeddings. + + Args: + documents (List[str]): The list of documents to be encoded. + + Returns: + List[List[float]]: The encoded embeddings. + + """ + loop = asyncio.get_running_loop() + embeddings = await loop.run_in_executor(None, self.encode, documents) + + # NOTE: The async implementation below has some edge cases because of + # httpx and async and returns "Event loop is closed." errors. Falling back to + # a thread-based implementation for now. + + # # We do lazy initialization of the async client to make sure it's on the correct loop + # async_client = async_client_var.get() + # if async_client is None: + # async_client = AsyncClient() + # async_client_var.set(async_client) + # + # # Make embedding request to Cohere API + # embeddings = await async_client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings + + return embeddings + + def encode(self, documents: List[str]) -> List[List[float]]: + """Encode a list of documents into embeddings. + + Args: + documents (List[str]): The list of documents to be encoded. + + Returns: + List[List[float]]: The encoded embeddings. + + """ + + # Make embedding request to Cohere API + return self.client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings From 3063df5d97e545c2de805bcd2fa3fed3cca66258 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 16:00:48 +0900 Subject: [PATCH 2/7] add annotation about setting api_key --- nemoguardrails/embeddings/providers/cohere.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py index afda11aaf..cf7f2c4d8 100644 --- a/nemoguardrails/embeddings/providers/cohere.py +++ b/nemoguardrails/embeddings/providers/cohere.py @@ -25,7 +25,12 @@ class CohereEmbeddingModel(EmbeddingModel): - """Embedding model using Cohere API. + """ + Embedding model using Cohere API. + + To use, you must have either: + 1. The ``COHERE_API_KEY`` environment variable set with your API key, or + 2. Pass your API key using the api_key kwarg to the Cohere constructor. Args: embedding_model (str): The name of the embedding model. From dd62471b535e915e3d57ce683061ca14cedeb9b7 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 16:02:18 +0900 Subject: [PATCH 3/7] add configuration-guide.md --- docs/user-guides/configuration-guide.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guides/configuration-guide.md b/docs/user-guides/configuration-guide.md index f9947cd18..b7fd97e19 100644 --- a/docs/user-guides/configuration-guide.md +++ b/docs/user-guides/configuration-guide.md @@ -538,6 +538,7 @@ The following tables lists the supported embedding providers: | OpenAI | `openai` | `text-embedding-ada-002`, etc. | | SentenceTransformers | `SentenceTransformers` | `all-MiniLM-L6-v2`, etc. | | NVIDIA AI Endpoints | `nvidia_ai_endpoints` | `nv-embed-v1`, etc. | +| Cohere | `cohere` | `embed-multilingual-v3.0`, etc. | ```{note} You can use any of the supported models for any of the supported embedding providers. From b1539a48af15c6a214bad1a041f1f37b4e15d946 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 16:03:26 +0900 Subject: [PATCH 4/7] add cohere at provider's init --- nemoguardrails/embeddings/providers/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemoguardrails/embeddings/providers/__init__.py b/nemoguardrails/embeddings/providers/__init__.py index c9a8f2896..11ce3471e 100644 --- a/nemoguardrails/embeddings/providers/__init__.py +++ b/nemoguardrails/embeddings/providers/__init__.py @@ -18,7 +18,7 @@ from typing import Optional, Type -from . import fastembed, nim, openai, sentence_transformers +from . import fastembed, nim, openai, sentence_transformers, cohere from .base import EmbeddingModel from .registry import EmbeddingProviderRegistry @@ -68,6 +68,7 @@ def register_embedding_provider( register_embedding_provider(sentence_transformers.SentenceTransformerEmbeddingModel) register_embedding_provider(nim.NIMEmbeddingModel) register_embedding_provider(nim.NVIDIAAIEndpointsEmbeddingModel) +register_embedding_provider(cohere.CohereEmbeddingModel) def init_embedding_model( From 825f916d1aeeb3cab90b24527a9ab0f392480ca4 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 16:07:19 +0900 Subject: [PATCH 5/7] add test code --- .../with_cohere_embeddings/config.co | 12 +++ .../with_cohere_embeddings/config.yml | 8 ++ tests/test_embeddings_cohere.py | 97 +++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 tests/test_configs/with_cohere_embeddings/config.co create mode 100644 tests/test_configs/with_cohere_embeddings/config.yml create mode 100644 tests/test_embeddings_cohere.py diff --git a/tests/test_configs/with_cohere_embeddings/config.co b/tests/test_configs/with_cohere_embeddings/config.co new file mode 100644 index 000000000..56035e40c --- /dev/null +++ b/tests/test_configs/with_cohere_embeddings/config.co @@ -0,0 +1,12 @@ +define user ask capabilities + "What can you do?" + "What can you help me with?" + "tell me what you can do" + "tell me about you" + +define bot inform capabilities + "I am an AI assistant that helps answer questions." + +define flow + user ask capabilities + bot inform capabilities diff --git a/tests/test_configs/with_cohere_embeddings/config.yml b/tests/test_configs/with_cohere_embeddings/config.yml new file mode 100644 index 000000000..71627761c --- /dev/null +++ b/tests/test_configs/with_cohere_embeddings/config.yml @@ -0,0 +1,8 @@ +models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct + + - type: embeddings + engine: cohere + model: embed-multilingual-v3.0 diff --git a/tests/test_embeddings_cohere.py b/tests/test_embeddings_cohere.py new file mode 100644 index 000000000..591c6d5b6 --- /dev/null +++ b/tests/test_embeddings_cohere.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +from nemoguardrails import LLMRails, RailsConfig + +try: + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel +except ImportError: + # Ignore this if running in test environment when cohere not installed. + CohereEmbeddingModel = None + +CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") + +LIVE_TEST_MODE = os.environ.get("LIVE_TEST") + + +@pytest.fixture +def app(): + """Load the configuration where we replace FastEmbed with Cohere.""" + config = RailsConfig.from_path( + os.path.join(CONFIGS_FOLDER, "with_cohere_embeddings") + ) + + return LLMRails(config) + + +@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.") +def test_custom_llm_registration(app): + assert isinstance( + app.llm_generation_actions.flows_index._model, CohereEmbeddingModel + ) + + +@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.") +@pytest.mark.asyncio +async def test_live_query(): + config = RailsConfig.from_path( + os.path.join(CONFIGS_FOLDER, "with_cohere_embeddings") + ) + app = LLMRails(config) + + result = await app.generate_async( + messages=[{"role": "user", "content": "tell me what you can do"}] + ) + + assert result == { + "role": "assistant", + "content": "I am an AI assistant that helps answer questions.", + } + + +@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.") +@pytest.mark.asyncio +def test_live_query(app): + result = app.generate( + messages=[{"role": "user", "content": "tell me what you can do"}] + ) + + assert result == { + "role": "assistant", + "content": "I am an AI assistant that helps answer questions.", + } + + +@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.") +def test_sync_embeddings(): + model = CohereEmbeddingModel("embed-multilingual-v3.0") + + result = model.encode(["test"]) + + assert len(result[0]) == 1024 + + +@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.") +@pytest.mark.asyncio +async def test_async_embeddings(): + model = CohereEmbeddingModel("embed-multilingual-v3.0") + + result = await model.encode_async(["test"]) + + assert len(result[0]) == 1024 \ No newline at end of file From cae3b914171bedbeeedcd98cd8e8506ea4418cb0 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 24 Jul 2025 16:11:10 +0900 Subject: [PATCH 6/7] add blank line --- tests/test_embeddings_cohere.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_embeddings_cohere.py b/tests/test_embeddings_cohere.py index 591c6d5b6..04b351d6b 100644 --- a/tests/test_embeddings_cohere.py +++ b/tests/test_embeddings_cohere.py @@ -94,4 +94,4 @@ async def test_async_embeddings(): result = await model.encode_async(["test"]) - assert len(result[0]) == 1024 \ No newline at end of file + assert len(result[0]) == 1024 From 4605ecacc895c2e9c30f24c793d4f240f108afe1 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Wed, 30 Jul 2025 15:31:46 +0900 Subject: [PATCH 7/7] run pre-commit --- nemoguardrails/embeddings/providers/__init__.py | 2 +- nemoguardrails/embeddings/providers/cohere.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nemoguardrails/embeddings/providers/__init__.py b/nemoguardrails/embeddings/providers/__init__.py index 11ce3471e..d8f3d6f16 100644 --- a/nemoguardrails/embeddings/providers/__init__.py +++ b/nemoguardrails/embeddings/providers/__init__.py @@ -18,7 +18,7 @@ from typing import Optional, Type -from . import fastembed, nim, openai, sentence_transformers, cohere +from . import cohere, fastembed, nim, openai, sentence_transformers from .base import EmbeddingModel from .registry import EmbeddingProviderRegistry diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py index cf7f2c4d8..c6171daac 100644 --- a/nemoguardrails/embeddings/providers/cohere.py +++ b/nemoguardrails/embeddings/providers/cohere.py @@ -120,4 +120,6 @@ def encode(self, documents: List[str]) -> List[List[float]]: """ # Make embedding request to Cohere API - return self.client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings + return self.client.embed( + texts=documents, model=self.model, input_type=self.input_type + ).embeddings