From 1198ae660f360e398683e7f25bfab2ee8c7db3be Mon Sep 17 00:00:00 2001 From: Sathyanarayanan-ops <57038667+Sathyanarayanan-ops@users.noreply.github.com> Date: Fri, 26 Sep 2025 08:49:47 -0700 Subject: [PATCH 1/5] Fix typo issues in falkordb vector --- .../langchain_community/vectorstores/falkordb_vector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/vectorstores/falkordb_vector.py b/libs/community/langchain_community/vectorstores/falkordb_vector.py index c53975a94..3b9e1a40d 100644 --- a/libs/community/langchain_community/vectorstores/falkordb_vector.py +++ b/libs/community/langchain_community/vectorstores/falkordb_vector.py @@ -384,7 +384,7 @@ def __init__( DistanceStrategy.COSINE, ]: raise ValueError( - "`distance_strategy` must be either 'EULIDEAN_DISTANCE` or `COSINE`" + "`distance_strategy` must be either `EUCLIDEAN_DISTANCE` or `COSINE`" ) # Graph object takes precedent over env or input params From cc8805627fe9d2e3acaeb62303f9bf2c6b4b8948 Mon Sep 17 00:00:00 2001 From: Sathyanarayanan-ops <57038667+Sathyanarayanan-ops@users.noreply.github.com> Date: Sun, 28 Sep 2025 11:41:25 -0700 Subject: [PATCH 2/5] FIxed major bugs in falkordb distance similarity calculation - replaced hardcoded values, fixed validations --- .../vectorstores/falkordb_vector.py | 41 +++++++++++++++---- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/falkordb_vector.py b/libs/community/langchain_community/vectorstores/falkordb_vector.py index 3b9e1a40d..d3f70be2b 100644 --- a/libs/community/langchain_community/vectorstores/falkordb_vector.py +++ b/libs/community/langchain_community/vectorstores/falkordb_vector.py @@ -28,7 +28,7 @@ def generate_random_string(length: int) -> str: return random_string -DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE +DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN_DISTANCE DISTANCE_MAPPING = { DistanceStrategy.EUCLIDEAN_DISTANCE: "euclidean", DistanceStrategy.COSINE: "cosine", @@ -492,6 +492,9 @@ def retrieve_existing_node_index( Check if the vector index exists in the FalkorDB database and returns its embedding dimension, entity_type, entity_label, entity_property + + This version also validates the similarity_function against the configured + distance_strategy, so we don't silently reuse an index with the wrong distance metric. This method; 1. queries the FalkorDB database for existing indexes @@ -539,9 +542,20 @@ def retrieve_existing_node_index( entity_type = str(dict["entity_type"]) entity_label = str(dict["entity_label"]) entity_property = str(dict["entity_property"]) + similarity_function = dict.get("index_similarityFunction") break if embedding_dimension and entity_type and entity_label and entity_property: self._index_type = IndexType(entity_type) + desired_sim = DISTANCE_MAPPING[self._distance_strategy] + if similarity_function and similarity_function != desired_sim: + raise ValueError( + f"Existing index on {entity_label}.{entity_property} " + f"uses similarity_function='{similarity_function}', " + f"but requested distance_strategy is '{self._distance_strategy}' " + f"({desired_sim}). " + "Drop/recreate the index or change the distance_strategy." + ) + return embedding_dimension, entity_type, entity_label, entity_property else: return None, None, None, None @@ -729,7 +743,7 @@ def create_new_index_on_relationship( relation_type, embedding_node_property, dim=embedding_dimension, - similarity_function=DISTANCE_MAPPING[DEFAULT_DISTANCE_STRATEGY], + similarity_function=DISTANCE_MAPPING[self._distance_strategy], ) except Exception as e: if "already indexed" in str(e): @@ -949,6 +963,7 @@ def __from( metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, search_type: SearchType = SearchType.VECTOR, + distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, **kwargs: Any, ) -> FalkorDBVector: if ids is None: @@ -960,8 +975,10 @@ def __from( store = cls( embedding=embedding, search_type=search_type, + distance_strategy=distance_strategy, **kwargs, - ) + ) + # Check if the vector index already exists embedding_dimension, index_type, entity_label, entity_property = ( @@ -1139,6 +1156,7 @@ def from_existing_graph( *, search_type: SearchType = DEFAULT_SEARCH_TYPE, retrieval_query: str = "", + distance_strategy = DEFAULT_DISTANCE_STRATEGY, **kwargs: Any, ) -> FalkorDBVector: """ @@ -1198,6 +1216,7 @@ def from_existing_graph( retrieval_query=retrieval_query, node_label=node_label, embedding_node_property=embedding_node_property, + distance_strategy=distance_strategy, **kwargs, ) @@ -1444,15 +1463,19 @@ def similarity_search_with_score_by_vector( f"n.{self.embedding_node_property} IS NOT NULL AND " ) - base_cosine_query = ( + if self._distance_strategy == DistanceStrategy.COSINE: + base_distance_query = ( " WITH n as node, " - f" vec.cosineDistance(n.{self.embedding_node_property}" - ", vecf32($embedding)) as score " - ) - + f" vec.cosineDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score " + ) + else: + base_distance_query = ( + " WITH n as node, " + f" vec.euclideanDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score " + ) filter_snippets, filter_params = construct_metadata_filter(filter) - index_query = base_index_query + filter_snippets + base_cosine_query + index_query = base_index_query + filter_snippets + base_distance_query else: index_query = _get_search_index_query(self.search_type, self._index_type) filter_params = {} From d499bd0c1301c7b17135fbe7d3dc7f553617fb65 Mon Sep 17 00:00:00 2001 From: Sathyanarayanan-ops <57038667+Sathyanarayanan-ops@users.noreply.github.com> Date: Sun, 28 Sep 2025 17:53:23 -0700 Subject: [PATCH 3/5] Unittests for falkor db distance calculation fix --- .../test_falkordb_distance_strategy.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py diff --git a/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py new file mode 100644 index 000000000..29340c962 --- /dev/null +++ b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py @@ -0,0 +1,148 @@ +""" +Unit tests for FalkorDBVector distance strategy handling. + +These tests verify that the FalkorDB LangChain wrapper respects the +configured distance strategy when creating relationship indexes, +propagates a custom distance strategy when instantiating from +documents, and builds the correct distance function into the metadata +filter search query. + +The tests use unittest.mock to avoid requiring a live FalkorDB +instance. They focus on the behaviour of the wrapper itself. +""" + +from typing import Any, List +from unittest.mock import MagicMock + +import pytest +from langchain_core.documents import Document + +from langchain_community.vectorstores.falkordb_vector import ( + FalkorDBVector, + IndexType, +) +from langchain_community.vectorstores.utils import DistanceStrategy + + +class DummyEmbeddings: + """A minimal embeddings implementation for testing. + + This class implements the methods expected by FalkorDBVector + but returns trivial fixed‑size vectors so that tests can run + without access to external embedding models. + """ + + def __init__(self, size: int = 2) -> None: + self.size = size + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + # Return a distinct vector for each document; dimension is ``self.size``. + return [[float(i + 1) for _ in range(self.size)] for i in range(len(texts))] + + def embed_query(self, text: str) -> List[float]: + # Return a simple vector of the correct dimension for any query. + return [1.0 for _ in range(self.size)] + + +def test_create_new_index_on_relationship_respects_strategy() -> None: + """Ensure that create_new_index_on_relationship uses the configured metric.""" + # Mock graph and database; create_edge_vector_index should record its kwargs. + fake_db = MagicMock() + fake_graph = MagicMock() + fake_graph._graph = fake_db + fake_graph._driver = MagicMock() + + # Instantiate a FalkorDBVector with cosine distance + store = FalkorDBVector( + embedding=DummyEmbeddings(), + graph=fake_graph, + relation_type="REL", + embedding_node_property="embedding", + embedding_dimension=2, + distance_strategy=DistanceStrategy.COSINE, + ) + + store.create_new_index_on_relationship() + # Verify that the underlying DB method was called with similarity_function="cosine" + assert fake_db.create_edge_vector_index.call_count == 1 + _, kwargs = fake_db.create_edge_vector_index.call_args + assert kwargs["similarity_function"] == "cosine" + + +def test_from_documents_propagates_distance_strategy() -> None: + """Ensure that from_documents forwards distance_strategy to the store.""" + fake_db = MagicMock() + fake_graph = MagicMock() + fake_graph._graph = fake_db + fake_graph._driver = MagicMock() + + docs = [Document(page_content="alpha"), Document(page_content="beta")] + store = FalkorDBVector.from_documents( + documents=docs, + embedding=DummyEmbeddings(), + graph=fake_graph, + embedding_dimension=2, + node_label="Test", + distance_strategy=DistanceStrategy.COSINE, + ) + + assert store._distance_strategy == DistanceStrategy.COSINE + + +def test_similarity_search_with_score_by_vector_uses_correct_distance() -> None: + """Ensure metadata-filtered vector search uses the correct distance function.""" + # Prepare a store with cosine distance + fake_db = MagicMock() + fake_graph = MagicMock() + fake_graph._graph = fake_db + fake_graph._driver = MagicMock() + + store = FalkorDBVector( + embedding=DummyEmbeddings(), + graph=fake_graph, + node_label="Chunk", + embedding_node_property="embedding", + embedding_dimension=2, + distance_strategy=DistanceStrategy.COSINE, + ) + # Manually set index type for query construction + store._index_type = IndexType.NODE + + captured: dict[str, Any] = {} + + def fake_query(query: str, params: Any = None) -> List[Any]: + captured["query"] = query + return [] + + # Patch the _query method to capture the query string + store._query = fake_query # type: ignore[assignment] + + # Perform a similarity search with a metadata filter; query should contain cosine distance + store.similarity_search_with_score_by_vector( + embedding=[0.1, 0.2], k=1, filter={"lang": "en"} + ) + assert "vec.cosineDistance" in captured["query"] + + # Repeat for Euclidean strategy + store2 = FalkorDBVector( + embedding=DummyEmbeddings(), + graph=fake_graph, + node_label="Chunk", + embedding_node_property="embedding", + embedding_dimension=2, + distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE, + ) + store2._index_type = IndexType.NODE + + captured2: dict[str, Any] = {} + + def fake_query2(query: str, params: Any = None) -> List[Any]: + captured2["query"] = query + return [] + + store2._query = fake_query2 # type: ignore[assignment] + + store2.similarity_search_with_score_by_vector( + embedding=[0.3, 0.4], k=1, filter={"lang": "en"} + ) + assert "vec.euclideanDistance" in captured2["query"] From f66195ffbc01281ad835db8c4ccfa2d23b2b3eec Mon Sep 17 00:00:00 2001 From: Sathyanarayanan-ops <57038667+Sathyanarayanan-ops@users.noreply.github.com> Date: Tue, 4 Nov 2025 21:23:51 -0800 Subject: [PATCH 4/5] fix: unit test fail and Ruff line exceed error --- .../vectorstores/falkordb_vector.py | 29 +++++++++++-------- .../test_falkordb_distance_strategy.py | 3 ++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/falkordb_vector.py b/libs/community/langchain_community/vectorstores/falkordb_vector.py index 5458e239c..0ffab33d9 100644 --- a/libs/community/langchain_community/vectorstores/falkordb_vector.py +++ b/libs/community/langchain_community/vectorstores/falkordb_vector.py @@ -492,9 +492,10 @@ def retrieve_existing_node_index( Check if the vector index exists in the FalkorDB database and returns its embedding dimension, entity_type, entity_label, entity_property - - This version also validates the similarity_function against the configured - distance_strategy, so we don't silently reuse an index with the wrong distance metric. + + This version also validates the similarity_function against + the configured distance_strategy, so we don't silently + reuse an index with the wrong distance metric. This method; 1. queries the FalkorDB database for existing indexes @@ -550,10 +551,13 @@ def retrieve_existing_node_index( if similarity_function and similarity_function != desired_sim: raise ValueError( f"Existing index on {entity_label}.{entity_property} " - f"uses similarity_function='{similarity_function}', " - f"but requested distance_strategy is '{self._distance_strategy}' " + f"uses" + f"similarity_function='{similarity_function}', " + f"but requested distance_strategy is" + f"'{self._distance_strategy}' " f"({desired_sim}). " - "Drop/recreate the index or change the distance_strategy." + "Drop/recreate the index or" + "change the distance_strategy." ) return embedding_dimension, entity_type, entity_label, entity_property @@ -977,8 +981,7 @@ def __from( search_type=search_type, distance_strategy=distance_strategy, **kwargs, - ) - + ) # Check if the vector index already exists embedding_dimension, index_type, entity_label, entity_property = ( @@ -1156,7 +1159,7 @@ def from_existing_graph( *, search_type: SearchType = DEFAULT_SEARCH_TYPE, retrieval_query: str = "", - distance_strategy = DEFAULT_DISTANCE_STRATEGY, + distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, **kwargs: Any, ) -> FalkorDBVector: """ @@ -1465,13 +1468,15 @@ def similarity_search_with_score_by_vector( if self._distance_strategy == DistanceStrategy.COSINE: base_distance_query = ( - " WITH n as node, " - f" vec.cosineDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score " + " WITH n as node, " + f" vec.cosineDistance(n.{self.embedding_node_property}," + f" vecf32($embedding)) as score " ) else: base_distance_query = ( " WITH n as node, " - f" vec.euclideanDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score " + f" vec.euclideanDistance(n.{self.embedding_node_property}," + f"vecf32($embedding)) as score " ) filter_snippets, filter_params = construct_metadata_filter(filter) diff --git a/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py index 29340c962..68388032d 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py +++ b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py @@ -10,6 +10,9 @@ The tests use unittest.mock to avoid requiring a live FalkorDB instance. They focus on the behaviour of the wrapper itself. """ +import pytest + +pytest.importorskip("falkordb") from typing import Any, List from unittest.mock import MagicMock From 06d5822a7ff5400977d7d0380791584c1d26bc4d Mon Sep 17 00:00:00 2001 From: Sathyanarayanan-ops <57038667+Sathyanarayanan-ops@users.noreply.github.com> Date: Tue, 4 Nov 2025 21:29:49 -0800 Subject: [PATCH 5/5] fix: Unit test mypy error fix FalkorDB --- .../vectorstores/test_falkordb_distance_strategy.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py index 68388032d..165bfe0cb 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py +++ b/libs/community/tests/unit_tests/vectorstores/test_falkordb_distance_strategy.py @@ -10,6 +10,7 @@ The tests use unittest.mock to avoid requiring a live FalkorDB instance. They focus on the behaviour of the wrapper itself. """ + import pytest pytest.importorskip("falkordb") @@ -19,6 +20,7 @@ import pytest from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings from langchain_community.vectorstores.falkordb_vector import ( FalkorDBVector, @@ -27,11 +29,11 @@ from langchain_community.vectorstores.utils import DistanceStrategy -class DummyEmbeddings: +class DummyEmbeddings(Embeddings): """A minimal embeddings implementation for testing. This class implements the methods expected by FalkorDBVector - but returns trivial fixed‑size vectors so that tests can run + but returns trivial fixed-size vectors so that tests can run without access to external embedding models. """ @@ -120,7 +122,8 @@ def fake_query(query: str, params: Any = None) -> List[Any]: # Patch the _query method to capture the query string store._query = fake_query # type: ignore[assignment] - # Perform a similarity search with a metadata filter; query should contain cosine distance + # Perform a similarity search with a metadata filter; + # query should contain cosine distance store.similarity_search_with_score_by_vector( embedding=[0.1, 0.2], k=1, filter={"lang": "en"} )