From 3fa3a4d24c199597b0fb34c5cf35c9508ba583d2 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 15:56:08 +0900
Subject: [PATCH 1/7] add cohere embedding provider

---
 nemoguardrails/embeddings/providers/cohere.py | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 nemoguardrails/embeddings/providers/cohere.py

diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py
new file mode 100644
index 000000000..afda11aaf
--- /dev/null
+++ b/nemoguardrails/embeddings/providers/cohere.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+from contextvars import ContextVar
+from typing import List
+
+from .base import EmbeddingModel
+
+# We set the Cohere async client in an asyncio context variable because we need it
+# to be scoped at the asyncio loop level. The client caches it somewhere, and if the loop
+# is changed, it will fail.
+async_client_var: ContextVar = ContextVar("async_client", default=None)
+
+
+class CohereEmbeddingModel(EmbeddingModel):
+    """Embedding model using Cohere API.
+
+    Args:
+        embedding_model (str): The name of the embedding model.
+        input_type (str): The type of input for the embedding model, default is "search_document".
+            "search_document", "search_query", "classification", "clustering", "image"
+
+    Attributes:
+        model (str): The name of the embedding model.
+        embedding_size (int): The size of the embeddings.
+
+    Methods:
+        encode: Encode a list of documents into embeddings.
+    """
+
+    engine_name = "cohere"
+
+    def __init__(
+        self,
+        embedding_model: str,
+        input_type: str = "search_document",
+        **kwargs,
+    ):
+        try:
+            import cohere
+            from cohere import AsyncClient, Client
+        except ImportError:
+            raise ImportError(
+                "Could not import cohere, please install it with "
+                "`pip install cohere`."
+            )
+
+        self.model = embedding_model
+        self.input_type = input_type
+        self.client = cohere.Client(**kwargs)
+
+        self.embedding_size_dict = {
+            "embed-v4.0": 1536,
+            "embed-english-v3.0": 1024,
+            "embed-english-light-v3.0": 384,
+            "embed-multilingual-v3.0": 1024,
+            "embed-multilingual-light-v3.0": 384,
+        }
+
+        if self.model in self.embedding_size_dict:
+            self.embedding_size = self.embedding_size_dict[self.model]
+        else:
+            # Perform a first encoding to get the embedding size
+            self.embedding_size = len(self.encode(["test"])[0])
+
+    async def encode_async(self, documents: List[str]) -> List[List[float]]:
+        """Encode a list of documents into embeddings.
+
+        Args:
+            documents (List[str]): The list of documents to be encoded.
+
+        Returns:
+            List[List[float]]: The encoded embeddings.
+
+        """
+        loop = asyncio.get_running_loop()
+        embeddings = await loop.run_in_executor(None, self.encode, documents)
+
+        # NOTE: The async implementation below has some edge cases because of
+        # httpx and async and returns "Event loop is closed." errors. Falling back to
+        # a thread-based implementation for now.
+
+        # # We do lazy initialization of the async client to make sure it's on the correct loop
+        # async_client = async_client_var.get()
+        # if async_client is None:
+        #     async_client = AsyncClient()
+        #     async_client_var.set(async_client)
+        #
+        # # Make embedding request to Cohere API
+        # embeddings = await async_client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings
+
+        return embeddings
+
+    def encode(self, documents: List[str]) -> List[List[float]]:
+        """Encode a list of documents into embeddings.
+
+        Args:
+            documents (List[str]): The list of documents to be encoded.
+
+        Returns:
+            List[List[float]]: The encoded embeddings.
+
+        """
+
+        # Make embedding request to Cohere API
+        return self.client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings

From 3063df5d97e545c2de805bcd2fa3fed3cca66258 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 16:00:48 +0900
Subject: [PATCH 2/7] add annotation about setting api_key

---
 nemoguardrails/embeddings/providers/cohere.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py
index afda11aaf..cf7f2c4d8 100644
--- a/nemoguardrails/embeddings/providers/cohere.py
+++ b/nemoguardrails/embeddings/providers/cohere.py
@@ -25,7 +25,12 @@
 
 
 class CohereEmbeddingModel(EmbeddingModel):
-    """Embedding model using Cohere API.
+    """
+    Embedding model using Cohere API.
+
+    To use, you must have either:
+        1. The ``COHERE_API_KEY`` environment variable set with your API key, or
+        2. Pass your API key using the api_key kwarg to the Cohere constructor.
 
     Args:
         embedding_model (str): The name of the embedding model.

From dd62471b535e915e3d57ce683061ca14cedeb9b7 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 16:02:18 +0900
Subject: [PATCH 3/7] add configuration-guide.md

---
 docs/user-guides/configuration-guide.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/user-guides/configuration-guide.md b/docs/user-guides/configuration-guide.md
index f9947cd18..b7fd97e19 100644
--- a/docs/user-guides/configuration-guide.md
+++ b/docs/user-guides/configuration-guide.md
@@ -538,6 +538,7 @@ The following tables lists the supported embedding providers:
 | OpenAI               | `openai`               | `text-embedding-ada-002`, etc.     |
 | SentenceTransformers | `SentenceTransformers` | `all-MiniLM-L6-v2`, etc.           |
 | NVIDIA AI Endpoints  | `nvidia_ai_endpoints`  | `nv-embed-v1`, etc.                |
+| Cohere               | `cohere`               | `embed-multilingual-v3.0`, etc.    |
 
 ```{note}
 You can use any of the supported models for any of the supported embedding providers.

From b1539a48af15c6a214bad1a041f1f37b4e15d946 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 16:03:26 +0900
Subject: [PATCH 4/7] add cohere at provider's init

---
 nemoguardrails/embeddings/providers/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemoguardrails/embeddings/providers/__init__.py b/nemoguardrails/embeddings/providers/__init__.py
index c9a8f2896..11ce3471e 100644
--- a/nemoguardrails/embeddings/providers/__init__.py
+++ b/nemoguardrails/embeddings/providers/__init__.py
@@ -18,7 +18,7 @@
 
 from typing import Optional, Type
 
-from . import fastembed, nim, openai, sentence_transformers
+from . import fastembed, nim, openai, sentence_transformers, cohere
 from .base import EmbeddingModel
 from .registry import EmbeddingProviderRegistry
 
@@ -68,6 +68,7 @@ def register_embedding_provider(
 register_embedding_provider(sentence_transformers.SentenceTransformerEmbeddingModel)
 register_embedding_provider(nim.NIMEmbeddingModel)
 register_embedding_provider(nim.NVIDIAAIEndpointsEmbeddingModel)
+register_embedding_provider(cohere.CohereEmbeddingModel)
 
 
 def init_embedding_model(

From 825f916d1aeeb3cab90b24527a9ab0f392480ca4 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 16:07:19 +0900
Subject: [PATCH 5/7] add test code

---
 .../with_cohere_embeddings/config.co          | 12 +++
 .../with_cohere_embeddings/config.yml         |  8 ++
 tests/test_embeddings_cohere.py               | 97 +++++++++++++++++++
 3 files changed, 117 insertions(+)
 create mode 100644 tests/test_configs/with_cohere_embeddings/config.co
 create mode 100644 tests/test_configs/with_cohere_embeddings/config.yml
 create mode 100644 tests/test_embeddings_cohere.py

diff --git a/tests/test_configs/with_cohere_embeddings/config.co b/tests/test_configs/with_cohere_embeddings/config.co
new file mode 100644
index 000000000..56035e40c
--- /dev/null
+++ b/tests/test_configs/with_cohere_embeddings/config.co
@@ -0,0 +1,12 @@
+define user ask capabilities
+  "What can you do?"
+  "What can you help me with?"
+  "tell me what you can do"
+  "tell me about you"
+
+define bot inform capabilities
+  "I am an AI assistant that helps answer questions."
+
+define flow
+  user ask capabilities
+  bot inform capabilities
diff --git a/tests/test_configs/with_cohere_embeddings/config.yml b/tests/test_configs/with_cohere_embeddings/config.yml
new file mode 100644
index 000000000..71627761c
--- /dev/null
+++ b/tests/test_configs/with_cohere_embeddings/config.yml
@@ -0,0 +1,8 @@
+models:
+  - type: main
+    engine: openai
+    model: gpt-3.5-turbo-instruct
+
+  - type: embeddings
+    engine: cohere
+    model: embed-multilingual-v3.0
diff --git a/tests/test_embeddings_cohere.py b/tests/test_embeddings_cohere.py
new file mode 100644
index 000000000..591c6d5b6
--- /dev/null
+++ b/tests/test_embeddings_cohere.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+from nemoguardrails import LLMRails, RailsConfig
+
+try:
+    from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
+except ImportError:
+    # Ignore this if running in test environment when cohere not installed.
+    CohereEmbeddingModel = None
+
+CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs")
+
+LIVE_TEST_MODE = os.environ.get("LIVE_TEST")
+
+
+@pytest.fixture
+def app():
+    """Load the configuration where we replace FastEmbed with Cohere."""
+    config = RailsConfig.from_path(
+        os.path.join(CONFIGS_FOLDER, "with_cohere_embeddings")
+    )
+
+    return LLMRails(config)
+
+
+@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
+def test_custom_llm_registration(app):
+    assert isinstance(
+        app.llm_generation_actions.flows_index._model, CohereEmbeddingModel
+    )
+
+
+@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
+@pytest.mark.asyncio
+async def test_live_query():
+    config = RailsConfig.from_path(
+        os.path.join(CONFIGS_FOLDER, "with_cohere_embeddings")
+    )
+    app = LLMRails(config)
+
+    result = await app.generate_async(
+        messages=[{"role": "user", "content": "tell me what you can do"}]
+    )
+
+    assert result == {
+        "role": "assistant",
+        "content": "I am an AI assistant that helps answer questions.",
+    }
+
+
+@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
+@pytest.mark.asyncio
+def test_live_query(app):
+    result = app.generate(
+        messages=[{"role": "user", "content": "tell me what you can do"}]
+    )
+
+    assert result == {
+        "role": "assistant",
+        "content": "I am an AI assistant that helps answer questions.",
+    }
+
+
+@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
+def test_sync_embeddings():
+    model = CohereEmbeddingModel("embed-multilingual-v3.0")
+
+    result = model.encode(["test"])
+
+    assert len(result[0]) == 1024
+
+
+@pytest.mark.skipif(not LIVE_TEST_MODE, reason="Not in live mode.")
+@pytest.mark.asyncio
+async def test_async_embeddings():
+    model = CohereEmbeddingModel("embed-multilingual-v3.0")
+
+    result = await model.encode_async(["test"])
+
+    assert len(result[0]) == 1024
\ No newline at end of file

From cae3b914171bedbeeedcd98cd8e8506ea4418cb0 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 24 Jul 2025 16:11:10 +0900
Subject: [PATCH 6/7] add blank line

---
 tests/test_embeddings_cohere.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_embeddings_cohere.py b/tests/test_embeddings_cohere.py
index 591c6d5b6..04b351d6b 100644
--- a/tests/test_embeddings_cohere.py
+++ b/tests/test_embeddings_cohere.py
@@ -94,4 +94,4 @@ async def test_async_embeddings():
 
     result = await model.encode_async(["test"])
 
-    assert len(result[0]) == 1024
\ No newline at end of file
+    assert len(result[0]) == 1024

From 4605ecacc895c2e9c30f24c793d4f240f108afe1 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Wed, 30 Jul 2025 15:31:46 +0900
Subject: [PATCH 7/7] run pre-commit

---
 nemoguardrails/embeddings/providers/__init__.py | 2 +-
 nemoguardrails/embeddings/providers/cohere.py   | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/nemoguardrails/embeddings/providers/__init__.py b/nemoguardrails/embeddings/providers/__init__.py
index 11ce3471e..d8f3d6f16 100644
--- a/nemoguardrails/embeddings/providers/__init__.py
+++ b/nemoguardrails/embeddings/providers/__init__.py
@@ -18,7 +18,7 @@
 
 from typing import Optional, Type
 
-from . import fastembed, nim, openai, sentence_transformers, cohere
+from . import cohere, fastembed, nim, openai, sentence_transformers
 from .base import EmbeddingModel
 from .registry import EmbeddingProviderRegistry
 
diff --git a/nemoguardrails/embeddings/providers/cohere.py b/nemoguardrails/embeddings/providers/cohere.py
index cf7f2c4d8..c6171daac 100644
--- a/nemoguardrails/embeddings/providers/cohere.py
+++ b/nemoguardrails/embeddings/providers/cohere.py
@@ -120,4 +120,6 @@ def encode(self, documents: List[str]) -> List[List[float]]:
         """
 
         # Make embedding request to Cohere API
-        return self.client.embed(texts=documents, model=self.model, input_type=self.input_type).embeddings
+        return self.client.embed(
+            texts=documents, model=self.model, input_type=self.input_type
+        ).embeddings