-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cherry-pick docstores deserialization refactoring to 2.4.x (#8227)
* fix: deserialize Document Stores using specific `from_dict` class methods (#8207) * use from_dict * unused import * improve logic * improve reno * refactor: utility function for docstore deserialization (#8226) * refactor docstore deserialization * more tests * reno; headers * expose key
- Loading branch information
Showing
12 changed files
with
163 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from typing import Any, Dict | ||
|
||
from haystack import DeserializationError | ||
from haystack.core.serialization import default_from_dict, import_class_by_name | ||
|
||
|
||
def deserialize_document_store_in_init_parameters(data: Dict[str, Any], key: str = "document_store") -> Dict[str, Any]: | ||
""" | ||
Deserializes a generic document store from the init_parameters of a serialized component. | ||
:param data: | ||
The dictionary to deserialize from. | ||
:param key: | ||
The key in the `data["init_parameters"]` dictionary where the document store is specified. | ||
:returns: | ||
The dictionary, with the document store deserialized. | ||
:raises DeserializationError: | ||
If the document store is not properly specified in the serialization data or its type cannot be imported. | ||
""" | ||
init_params = data.get("init_parameters", {}) | ||
if key not in init_params: | ||
raise DeserializationError(f"Missing '{key}' in serialization data") | ||
if "type" not in init_params[key]: | ||
raise DeserializationError(f"Missing 'type' in {key} serialization data") | ||
|
||
doc_store_data = data["init_parameters"][key] | ||
try: | ||
doc_store_class = import_class_by_name(doc_store_data["type"]) | ||
except ImportError as e: | ||
raise DeserializationError(f"Class '{doc_store_data['type']}' not correctly imported") from e | ||
if hasattr(doc_store_class, "from_dict"): | ||
data["init_parameters"][key] = doc_store_class.from_dict(doc_store_data) | ||
else: | ||
data["init_parameters"][key] = default_from_dict(doc_store_class, doc_store_data) | ||
|
||
return data |
5 changes: 5 additions & 0 deletions
5
releasenotes/notes/docstore-deserialization-in-init-params-a123a39d5fbc957f.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
enhancements: | ||
- | | ||
Introduce an utility function to deserialize a generic Document Store | ||
from the init_parameters of a serialized component. |
7 changes: 7 additions & 0 deletions
7
releasenotes/notes/use-document-store-from-dict-db7975d0e0e5e451.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
fixes: | ||
- | | ||
For components that support multiple Document Stores, prioritize using the specific `from_dict` class method | ||
for deserialization when available. Otherwise, fall back to the generic `default_from_dict` method. | ||
This impacts the following generic components: `CacheChecker`, `DocumentWriter`, `FilterRetriever`, and | ||
`SentenceWindowRetriever`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from unittest.mock import patch | ||
import pytest | ||
|
||
from haystack.document_stores.in_memory.document_store import InMemoryDocumentStore | ||
from haystack.utils.docstore_deserialization import deserialize_document_store_in_init_parameters | ||
from haystack.core.errors import DeserializationError | ||
|
||
|
||
class FakeDocumentStore: | ||
pass | ||
|
||
|
||
def test_deserialize_document_store_in_init_parameters(): | ||
data = { | ||
"type": "haystack.components.writers.document_writer.DocumentWriter", | ||
"init_parameters": { | ||
"document_store": { | ||
"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", | ||
"init_parameters": {}, | ||
} | ||
}, | ||
} | ||
|
||
result = deserialize_document_store_in_init_parameters(data) | ||
assert isinstance(result["init_parameters"]["document_store"], InMemoryDocumentStore) | ||
|
||
|
||
def test_from_dict_is_called(): | ||
"""If the document store provides a from_dict method, it should be called.""" | ||
data = { | ||
"type": "haystack.components.writers.document_writer.DocumentWriter", | ||
"init_parameters": { | ||
"document_store": { | ||
"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", | ||
"init_parameters": {}, | ||
} | ||
}, | ||
} | ||
|
||
with patch.object(InMemoryDocumentStore, "from_dict") as mock_from_dict: | ||
deserialize_document_store_in_init_parameters(data) | ||
|
||
mock_from_dict.assert_called_once_with( | ||
{"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", "init_parameters": {}} | ||
) | ||
|
||
|
||
def test_default_from_dict_is_called(): | ||
"""If the document store does not provide a from_dict method, default_from_dict should be called.""" | ||
data = { | ||
"type": "haystack.components.writers.document_writer.DocumentWriter", | ||
"init_parameters": { | ||
"document_store": {"type": "test_docstore_deserialization.FakeDocumentStore", "init_parameters": {}} | ||
}, | ||
} | ||
|
||
with patch("haystack.utils.docstore_deserialization.default_from_dict") as mock_default_from_dict: | ||
deserialize_document_store_in_init_parameters(data) | ||
|
||
mock_default_from_dict.assert_called_once_with( | ||
FakeDocumentStore, {"type": "test_docstore_deserialization.FakeDocumentStore", "init_parameters": {}} | ||
) | ||
|
||
|
||
def test_missing_document_store_key(): | ||
data = {"init_parameters": {"policy": "SKIP"}} | ||
with pytest.raises(DeserializationError): | ||
deserialize_document_store_in_init_parameters(data) | ||
|
||
|
||
def test_missing_type_key_in_document_store(): | ||
data = {"init_parameters": {"document_store": {"init_parameters": {}}, "policy": "SKIP"}} | ||
with pytest.raises(DeserializationError): | ||
deserialize_document_store_in_init_parameters(data) | ||
|
||
|
||
def test_invalid_class_import(): | ||
data = { | ||
"init_parameters": { | ||
"document_store": {"type": "invalid.module.InvalidClass", "init_parameters": {}}, | ||
"policy": "SKIP", | ||
} | ||
} | ||
with pytest.raises(DeserializationError): | ||
deserialize_document_store_in_init_parameters(data) |