langchain[patch]: fix-cohere-reranker-rerank-method with cohere v5 (langchain-ai#19486)

jjovalle99 · baskaryan · web-flow · commit 51baa1b5cf22 · 2024-03-28T06:32:03.000Z
#### Description
Fixed the following error with `rerank` method from `CohereRerank`:
```
---&gt; [79](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/jjmov99/legal-colombia/~/legal-colombia/.venv/lib/python3.11/site-packages/langchain/retrievers/document_compressors/cohere_rerank.py:79) results = self.client.rerank(
     [80](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/jjmov99/legal-colombia/~/legal-colombia/.venv/lib/python3.11/site-packages/langchain/retrievers/document_compressors/cohere_rerank.py:80)     query, docs, model, top_n=top_n, max_chunks_per_doc=max_chunks_per_doc
     [81](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/jjmov99/legal-colombia/~/legal-colombia/.venv/lib/python3.11/site-packages/langchain/retrievers/document_compressors/cohere_rerank.py:81) )
     [82](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/jjmov99/legal-colombia/~/legal-colombia/.venv/lib/python3.11/site-packages/langchain/retrievers/document_compressors/cohere_rerank.py:82) result_dicts = []
     [83](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/jjmov99/legal-colombia/~/legal-colombia/.venv/lib/python3.11/site-packages/langchain/retrievers/document_compressors/cohere_rerank.py:83) for res in results.results:

TypeError: BaseCohere.rerank() takes 1 positional argument but 4 positional arguments (and 2 keyword-only arguments) were given
```
This was easily fixed going from this:
```
   def rerank(
        self,
        documents: Sequence[Union[str, Document, dict]],
        query: str,
        *,
        model: Optional[str] = None,
        top_n: Optional[int] = -1,
        max_chunks_per_doc: Optional[int] = None,
    ) -&gt; List[Dict[str, Any]]:
         ...
        if len(documents) == 0:  # to avoid empty api call
            return []
        docs = [
            doc.page_content if isinstance(doc, Document) else doc for doc in documents
        ]
        model = model or self.model
        top_n = top_n if (top_n is None or top_n &gt; 0) else self.top_n
        results = self.client.rerank(
            query, docs, model, top_n=top_n, max_chunks_per_doc=max_chunks_per_doc
        )
        result_dicts = []
        for res in results:
            result_dicts.append(
                {"index": res.index, "relevance_score": res.relevance_score}
            )
        return result_dicts
```
to this:
```
    def rerank(
        self,
        documents: Sequence[Union[str, Document, dict]],
        query: str,
        *,
        model: Optional[str] = None,
        top_n: Optional[int] = -1,
        max_chunks_per_doc: Optional[int] = None,
    ) -&gt; List[Dict[str, Any]]:
         ...
        if len(documents) == 0:  # to avoid empty api call
            return []
        docs = [
            doc.page_content if isinstance(doc, Document) else doc for doc in documents
        ]
        model = model or self.model
        top_n = top_n if (top_n is None or top_n &gt; 0) else self.top_n
        results = self.client.rerank(
            query=query, documents=docs, model=model, top_n=top_n, max_chunks_per_doc=max_chunks_per_doc &lt;-------------
        )
        result_dicts = []
        for res in results.results:  &lt;-------------
            result_dicts.append(
                {"index": res.index, "relevance_score": res.relevance_score}
            )
        return result_dicts
```
#### Unit &amp; Integration tests
I added a unit test to check the behaviour of `rerank`. Also fixed the
original integration test which was failing.

#### Format &amp; Linting
Everything worked properly with `make lint_diff`, `make format_diff` and
`make format`. However I noticed an error coming from other part of the
library when doing `make lint`:

```
(langchain-py3.9) ➜  langchain git:(master) make format
[ "." = "" ] || poetry run ruff format .
1636 files left unchanged
[ "." = "" ] || poetry run ruff --select I --fix .
(langchain-py3.9) ➜  langchain git:(master) make lint
./scripts/check_pydantic.sh .
./scripts/lint_imports.sh
poetry run ruff .
[ "." = "" ] || poetry run ruff format . --diff
1636 files already formatted
[ "." = "" ] || poetry run ruff --select I .
[ "." = "" ] || mkdir -p .mypy_cache &amp;&amp; poetry run mypy . --cache-dir .mypy_cache
langchain/agents/openai_assistant/base.py:252: error: Argument "file_ids" to "create" of "Assistants" has incompatible type "Optional[Any]"; expected "Union[list[str], NotGiven]"  [arg-type]
langchain/agents/openai_assistant/base.py:374: error: Argument "file_ids" to "create" of "AsyncAssistants" has incompatible type "Optional[Any]"; expected "Union[list[str], NotGiven]"  [arg-type]
Found 2 errors in 1 file (checked 1634 source files)
make: *** [Makefile:65: lint] Error 1
```

---------

Co-authored-by: Bagatur &lt;22008038+baskaryan@users.noreply.github.com&gt;
Co-authored-by: Bagatur &lt;baskaryan@gmail.com&gt;
diff --git a/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py b/libs/langchain/langchain/retrievers/document_compressors/cohere_rerank.py
@@ -81,8 +81,14 @@ def rerank(
         model = model or self.model
         top_n = top_n if (top_n is None or top_n > 0) else self.top_n
         results = self.client.rerank(
-            query, docs, model, top_n=top_n, max_chunks_per_doc=max_chunks_per_doc
+            query=query,
+            documents=docs,
+            model=model,
+            top_n=top_n,
+            max_chunks_per_doc=max_chunks_per_doc,
         )
+        if hasattr(results, "results"):
+            results = getattr(results, "results")
         result_dicts = []
         for res in results:
             result_dicts.append(
diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock
diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
@@ -36,7 +36,7 @@ jinja2 = {version = "^3", optional = true}
 tiktoken = {version = ">=0.3.2,<0.6.0", optional = true, python=">=3.9"}
 qdrant-client = {version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12"}
 dataclasses-json = ">= 0.5.7, < 0.7"
-cohere = {version = "^4", optional = true}
+cohere = {version = ">=4,<6", optional = true}
 openai = {version = "<2", optional = true}
 nlpcloud = {version = "^1", optional = true}
 huggingface_hub = {version = "^0", optional = true}
diff --git a/libs/langchain/tests/unit_tests/retrievers/document_compressors/test_cohere_rerank.py b/libs/langchain/tests/unit_tests/retrievers/document_compressors/test_cohere_rerank.py
@@ -1,8 +1,10 @@
 import os
 
 import pytest
+from pytest_mock import MockerFixture
 
 from langchain.retrievers.document_compressors import CohereRerank
+from langchain.schema import Document
 
 os.environ["COHERE_API_KEY"] = "foo"
 
@@ -14,3 +16,37 @@ def test_init() -> None:
     CohereRerank(
         top_n=5, model="rerank-english_v2.0", cohere_api_key="foo", user_agent="bar"
     )
+
+
+@pytest.mark.requires("cohere")
+def test_rerank(mocker: MockerFixture) -> None:
+    mock_client = mocker.MagicMock()
+    mock_result = mocker.MagicMock()
+    mock_result.results = [
+        mocker.MagicMock(index=0, relevance_score=0.8),
+        mocker.MagicMock(index=1, relevance_score=0.6),
+    ]
+    mock_client.rerank.return_value = mock_result
+
+    test_documents = [
+        Document(page_content="This is a test document."),
+        Document(page_content="Another test document."),
+    ]
+    test_query = "Test query"
+
+    mocker.patch("cohere.Client", return_value=mock_client)
+
+    reranker = CohereRerank(cohere_api_key="foo")
+    results = reranker.rerank(test_documents, test_query)
+
+    mock_client.rerank.assert_called_once_with(
+        query=test_query,
+        documents=[doc.page_content for doc in test_documents],
+        model="rerank-english-v2.0",
+        top_n=3,
+        max_chunks_per_doc=None,
+    )
+    assert results == [
+        {"index": 0, "relevance_score": 0.8},
+        {"index": 1, "relevance_score": 0.6},
+    ]