Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
### Added

- Added an optional `node_label_neo4j` parameter in the external retrievers to speed up the search query in Neo4j.

- Added an optional `id_property_getter` callable parameter in the Qdrant retriever to allow for custom ID retrieval.

## 1.10.1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def get_search_results(
)

result_tuples = [
[f"{o[self.id_property_external]}", o["score"] or 0.0]
(f"{o[self.id_property_external]}", o["score"] or 0.0)
for o in response["matches"]
]

Expand Down
16 changes: 12 additions & 4 deletions src/neo4j_graphrag/retrievers/external/qdrant/qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import neo4j
from pydantic import ValidationError
from qdrant_client import QdrantClient
from qdrant_client.conversions.common_types import ScoredPoint

from neo4j_graphrag.embeddings.base import Embedder
from neo4j_graphrag.exceptions import (
Expand Down Expand Up @@ -80,6 +81,7 @@ class QdrantNeo4jRetriever(ExternalRetriever):
result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Function to transform a neo4j.Record to a RetrieverResultItem.
neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to the server's default database ("neo4j" by default) (`see reference to documentation <https://neo4j.com/docs/operations-manual/current/database-administration/#manage-databases-default>`_).
node_label_neo4j (Optional[str]): The label of the Neo4j node to retrieve. This label must be properly escaped if needed, eg "`Label with spaces`".
id_property_getter (Optional[Callable[[ScoredPoint], str]]): Function to get the id property from a ScoredPoint. Defaults to point.payload.get(id_property_external, point.id).

Raises:
RetrieverInitializationError: If validation of the input arguments fail.
Expand All @@ -101,6 +103,7 @@ def __init__(
] = None,
neo4j_database: Optional[str] = None,
node_label_neo4j: Optional[str] = None,
id_property_getter: Optional[Callable[[ScoredPoint], Any]] = None,
):
try:
driver_model = Neo4jDriverModel(driver=driver)
Expand Down Expand Up @@ -142,6 +145,14 @@ def __init__(
self.return_properties = validated_data.return_properties
self.retrieval_query = validated_data.retrieval_query
self.result_formatter = validated_data.result_formatter
self.id_property_getter = id_property_getter

def get_match_id_from_point(self, point: ScoredPoint) -> Any:
if self.id_property_getter:
return self.id_property_getter(point)
if point.payload is None:
raise ValueError(f"Payload is None for point {point}")
return point.payload.get(self.id_property_external, point.id)

def get_search_results(
self,
Expand Down Expand Up @@ -220,10 +231,7 @@ def get_search_results(

result_tuples = []
for point in points:
assert point.payload is not None
result_tuples.append(
[point.payload.get(self.id_property_external, point.id), point.score]
)
result_tuples.append((self.get_match_id_from_point(point), point.score))

search_query = get_match_query(
return_properties=self.return_properties,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def get_search_results(
logger.debug("Response: %s", response)

result_tuples = [
[f"{o.properties[self.id_property_external]}", o.metadata.certainty or 0.0]
(f"{o.properties[self.id_property_external]}", o.metadata.certainty or 0.0)
for o in response.objects
]

Expand Down
6 changes: 3 additions & 3 deletions tests/unit/retrievers/external/test_pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_pinecone_retriever_search_happy_path(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_pinecone_retriever_search_return_properties(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_pinecone_retriever_search_retrieval_query(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down
74 changes: 71 additions & 3 deletions tests/unit/retrievers/external/test_qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from unittest import mock
from unittest.mock import MagicMock

Expand Down Expand Up @@ -70,7 +71,7 @@ def test_qdrant_retriever_search_happy_path(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down Expand Up @@ -149,7 +150,7 @@ def test_qdrant_retriever_search_return_properties(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down Expand Up @@ -215,7 +216,7 @@ def test_qdrant_retriever_search_retrieval_query(
driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [[f"node_{i}", i / top_k] for i in range(top_k)],
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
Expand Down Expand Up @@ -267,3 +268,70 @@ def test_qdrant_retriever_invalid_retrieval_query(

assert "retrieval_query" in str(exc_info.value)
assert "Input should be a valid string" in str(exc_info.value)


def test_qdrant_retriever_search_custom_match_id_getter(
driver: MagicMock, client: MagicMock
) -> None:
def my_id_getter(point: ScoredPoint) -> Any:
if point.payload is None:
raise Exception("Payload is None")
return point.payload["data"]["id"]

retriever = QdrantNeo4jRetriever(
driver=driver,
client=client,
collection_name="dummy-text",
id_property_neo4j="sync_id",
id_property_getter=my_id_getter,
)
with mock.patch.object(retriever, "client") as mock_client:
top_k = 5
mock_client.query_points.return_value = QueryResponse(
points=[
ScoredPoint(
id=i,
version=0,
score=i / top_k,
payload={
"data": {"id": f"node_{i}"},
},
)
for i in range(top_k)
]
)
driver.execute_query.return_value = (
[
neo4j.Record({"node": {"sync_id": f"node_{i}"}, "score": i / top_k})
for i in range(top_k)
],
None,
None,
)
query_vector = [1.0 for _ in range(1536)]
search_query = get_match_query()
records = retriever.search(query_vector=query_vector)

driver.execute_query.assert_called_once_with(
search_query,
{
"match_params": [(f"node_{i}", i / top_k) for i in range(top_k)],
"id_property": "sync_id",
},
database_=None,
routing_=neo4j.RoutingControl.READ,
)

assert records == RetrieverResult(
items=[
RetrieverResultItem(
content="<Record node={'sync_id': "
+ f"'node_{i}'"
+ "} "
+ f"score={i / top_k}>",
metadata=None,
)
for i in range(top_k)
],
metadata={"__retriever": "QdrantNeo4jRetriever"},
)
6 changes: 3 additions & 3 deletions tests/unit/retrievers/external/test_weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_text_search_remote_vector_store_happy_path(driver: MagicMock) -> None:
search_query,
{
"match_params": [
[node_id_value, node_match_score],
(node_id_value, node_match_score),
],
"id_property": "sync_id",
},
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_text_search_remote_vector_store_return_properties(driver: MagicMock) ->
search_query,
{
"match_params": [
[node_id_value, node_match_score],
(node_id_value, node_match_score),
],
"id_property": "sync_id",
},
Expand Down Expand Up @@ -190,7 +190,7 @@ def test_text_search_remote_vector_store_retrieval_query(driver: MagicMock) -> N
search_query,
{
"match_params": [
[node_id_value, node_match_score],
(node_id_value, node_match_score),
],
"id_property": "sync_id",
},
Expand Down