diff --git a/django_mongodb_backend/indexes.py b/django_mongodb_backend/indexes.py index 99c1ef5f3..7bbca8b8f 100644 --- a/django_mongodb_backend/indexes.py +++ b/django_mongodb_backend/indexes.py @@ -109,9 +109,37 @@ class SearchIndex(Index): suffix = "six" _error_id_prefix = "django_mongodb_backend.indexes.SearchIndex" - def __init__(self, *, fields=(), name=None): + def __init__( + self, *, fields=(), field_mappings=None, name=None, analyzer=None, search_analyzer=None + ): + if field_mappings and not isinstance(field_mappings, dict): + raise ValueError( + "field_mappings must be a dictionary mapping field names to their " + "Atlas Search field mappings." + ) + if analyzer is not None and not isinstance(analyzer, str): + raise ValueError(f"analyzer must be a string. got type: {type(analyzer)}") + if search_analyzer is not None and not isinstance(search_analyzer, str): + raise ValueError(f"search_analyzer must be a string. got type: {type(search_analyzer)}") + self.field_mappings = field_mappings + self.analyzer = analyzer + self.search_analyzer = search_analyzer + if field_mappings: + if fields: + raise ValueError("Cannot provide fields and fields_mappings") + fields = [*self.field_mappings.keys()] super().__init__(fields=fields, name=name) + def deconstruct(self): + path, args, kwargs = super().deconstruct() + if self.field_mappings is not None: + kwargs["field_mappings"] = self.field_mappings + if self.analyzer is not None: + kwargs["analyzer"] = self.analyzer + if self.search_analyzer is not None: + kwargs["search_analyzer"] = self.search_analyzer + return path, args, kwargs + def check(self, model, connection): errors = [] if not connection.features.supports_atlas_search: @@ -152,12 +180,21 @@ def get_pymongo_index_model( return None fields = {} for field_name, _ in self.fields_orders: - field = model._meta.get_field(field_name) - type_ = self.search_index_data_types(field.db_type(schema_editor.connection)) field_path = column_prefix + model._meta.get_field(field_name).column - fields[field_path] = {"type": type_} + if self.field_mappings: + fields[field_path] = self.field_mappings[field_name] + else: + field = model._meta.get_field(field_name) + type_ = self.search_index_data_types(field.db_type(schema_editor.connection)) + fields[field_path] = {"type": type_} + analyzers = {} + if self.analyzer is not None: + analyzers["analyzer"] = self.analyzer + if self.search_analyzer is not None: + analyzers["searchAnalyzer"] = self.search_analyzer return SearchIndexModel( - definition={"mappings": {"dynamic": False, "fields": fields}}, name=self.name + definition={"mappings": {"dynamic": False, "fields": fields}, **analyzers}, + name=self.name, ) diff --git a/django_mongodb_backend/introspection.py b/django_mongodb_backend/introspection.py index 714f22465..7428a0b74 100644 --- a/django_mongodb_backend/introspection.py +++ b/django_mongodb_backend/introspection.py @@ -44,8 +44,12 @@ def _get_search_index_info(self, table_name): type_ = VectorSearchIndex.suffix options = details else: - options = details["latestDefinition"]["mappings"] - columns = list(options.get("fields", {}).keys()) + options = { + "analyzer": details["latestDefinition"].get("analyzer"), + "searchAnalyzer": details["latestDefinition"].get("searchAnalyzer"), + "mappings": details["latestDefinition"]["mappings"], + } + columns = list(options["mappings"].get("fields", {}).keys()) type_ = SearchIndex.suffix constraints[details["name"]] = { "check": False, diff --git a/docs/ref/models/indexes.rst b/docs/ref/models/indexes.rst index 984177687..ba3f218ef 100644 --- a/docs/ref/models/indexes.rst +++ b/docs/ref/models/indexes.rst @@ -26,7 +26,7 @@ minutes, depending on the size of the collection. ``SearchIndex`` --------------- -.. class:: SearchIndex(fields=(), name=None) +.. class:: SearchIndex(fields=(), field_mappings=None, name=None, analyzer=None, search_analyzer=None) Creates a basic :doc:`search index ` on the given field(s). @@ -35,12 +35,29 @@ minutes, depending on the size of the collection. supported. See the :ref:`Atlas documentation ` for a complete list of unsupported data types. + Use ``field_mappings`` (instead of ``fields``) to create an advanced search + index. ``field_mappings`` is a dictionary that maps field names to index + options (see ``definition["mappings"]["fields"]`` in the + :ref:`atlas:fts-static-mapping-example`). + If ``name`` isn't provided, one will be generated automatically. If you need to reference the name in your search query and don't provide your own name, you can lookup the generated one using ``Model._meta.indexes[0].name`` (substituting the name of your model as well as a different list index if your model has multiple indexes). + Use ``analyzer`` or ``search_analyzer`` to configure the + indexing and searching analyzer, respectively, for + the search index definition. If these fields are not provided, + they will default to ``lucene.standard`` at the server level. + (See ``definition["mappings"]["analyzer"]`` + and ``definition["mappings"]["searchAnalyzer"]`` + in the :ref:`atlas:fts-static-mapping-example`). + + .. versionchanged:: 5.2.2 + + The ``fields_mappings``, ``analyzer``, and ``search_analyzer`` arguments were added. + ``VectorSearchIndex`` --------------------- diff --git a/docs/releases/5.2.x.rst b/docs/releases/5.2.x.rst index c2a559f3f..e2e0b8c4d 100644 --- a/docs/releases/5.2.x.rst +++ b/docs/releases/5.2.x.rst @@ -10,7 +10,8 @@ Django MongoDB Backend 5.2.x New features ------------ -- ... +- Added the ``field_mappings``, ``analyzer``, and ``search_analyzer`` + arguments to :class:`.SearchIndex` to allow creating advanced indexes. Bug fixes --------- diff --git a/tests/atlas_search_/test_search.py b/tests/atlas_search_/test_search.py index b1102a742..192c231d0 100644 --- a/tests/atlas_search_/test_search.py +++ b/tests/atlas_search_/test_search.py @@ -8,7 +8,6 @@ from django.db.models.query import QuerySet from django.db.utils import DatabaseError from django.test import TransactionTestCase, skipUnlessDBFeature -from pymongo.operations import SearchIndexModel from django_mongodb_backend.expressions import ( CompoundExpression, @@ -28,7 +27,7 @@ SearchVector, SearchWildcard, ) -from django_mongodb_backend.schema import DatabaseSchemaEditor +from django_mongodb_backend.indexes import SearchIndex, VectorSearchIndex from .models import Article, Location, Writer @@ -75,22 +74,15 @@ class SearchUtilsMixin(TransactionTestCase): assertListEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertListEqual) assertQuerySetEqual = _delayed_assertion(timeout=2)(TransactionTestCase.assertQuerySetEqual) - @staticmethod - def _get_collection(model): - return connection.database.get_collection(model._meta.db_table) - @classmethod - def create_search_index(cls, model, index_name, definition, type="search"): - # TODO: create/delete indexes using DatabaseSchemaEditor when - # SearchIndexes support mappings (INTPYTHON-729). - collection = cls._get_collection(model) - idx = SearchIndexModel(definition=definition, name=index_name, type=type) - collection.create_search_index(idx) - DatabaseSchemaEditor.wait_until_index_created(collection, index_name) + def create_search_index(cls, model, index_name, definition, index_cls=SearchIndex): + idx = index_cls(field_mappings=definition, name=index_name) + with connection.schema_editor() as editor: + editor.add_index(model, idx) def drop_index(): - collection.drop_search_index(index_name) - DatabaseSchemaEditor.wait_until_index_dropped(collection, index_name) + with connection.schema_editor() as editor: + editor.remove_index(model, idx) cls.addClassCleanup(drop_index) @@ -101,12 +93,7 @@ def setUpClass(cls): cls.create_search_index( Article, "equals_headline_index", - { - "mappings": { - "dynamic": False, - "fields": {"headline": {"type": "token"}, "number": {"type": "number"}}, - } - }, + {"headline": {"type": "token"}, "number": {"type": "number"}}, ) def setUp(self): @@ -167,32 +154,27 @@ def setUpClass(cls): Article, "autocomplete_headline_index", { - "mappings": { - "dynamic": False, + "headline": { + "type": "autocomplete", + "analyzer": "lucene.standard", + "tokenization": "edgeGram", + "minGrams": 3, + "maxGrams": 5, + "foldDiacritics": False, + }, + "writer": { + "type": "document", "fields": { - "headline": { + "name": { "type": "autocomplete", "analyzer": "lucene.standard", "tokenization": "edgeGram", "minGrams": 3, "maxGrams": 5, "foldDiacritics": False, - }, - "writer": { - "type": "document", - "fields": { - "name": { - "type": "autocomplete", - "analyzer": "lucene.standard", - "tokenization": "edgeGram", - "minGrams": 3, - "maxGrams": 5, - "foldDiacritics": False, - } - }, - }, + } }, - } + }, }, ) @@ -253,7 +235,7 @@ def setUpClass(cls): cls.create_search_index( Article, "exists_body_index", - {"mappings": {"dynamic": False, "fields": {"body": {"type": "token"}}}}, + {"body": {"type": "token"}}, ) def setUp(self): @@ -282,7 +264,7 @@ def setUpClass(cls): cls.create_search_index( Article, "in_headline_index", - {"mappings": {"dynamic": False, "fields": {"headline": {"type": "token"}}}}, + {"headline": {"type": "token"}}, ) def setUp(self): @@ -316,7 +298,7 @@ def setUpClass(cls): cls.create_search_index( Article, "phrase_body_index", - {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, + {"body": {"type": "string"}}, ) def setUp(self): @@ -356,13 +338,8 @@ def setUpClass(cls): Article, "query_string_index", { - "mappings": { - "dynamic": False, - "fields": { - "headline": {"type": "string"}, - "body": {"type": "string"}, - }, - } + "headline": {"type": "string"}, + "body": {"type": "string"}, }, ) @@ -416,7 +393,7 @@ def setUpClass(cls): cls.create_search_index( Article, "range_number_index", - {"mappings": {"dynamic": False, "fields": {"number": {"type": "number"}}}}, + {"number": {"type": "number"}}, ) Article.objects.create(headline="x", number=5, body="z") @@ -453,12 +430,7 @@ def setUpClass(cls): cls.create_search_index( Article, "regex_headline_index", - { - "mappings": { - "dynamic": False, - "fields": {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, - } - }, + {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, ) def setUp(self): @@ -498,7 +470,7 @@ def setUpClass(cls): cls.create_search_index( Article, "text_body_index", - {"mappings": {"dynamic": False, "fields": {"body": {"type": "string"}}}}, + {"body": {"type": "string"}}, ) def setUp(self): @@ -560,12 +532,7 @@ def setUpClass(cls): cls.create_search_index( Article, "wildcard_headline_index", - { - "mappings": { - "dynamic": False, - "fields": {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, - } - }, + {"headline": {"type": "string", "analyzer": "lucene.keyword"}}, ) def setUp(self): @@ -603,12 +570,7 @@ def setUpClass(cls): cls.create_search_index( Article, "geoshape_location_index", - { - "mappings": { - "dynamic": False, - "fields": {"location": {"type": "geo", "indexShapes": True}}, - } - }, + {"location": {"type": "geo", "indexShapes": True}}, ) def setUp(self): @@ -668,7 +630,7 @@ def setUpClass(cls): cls.create_search_index( Article, "geowithin_location_index", - {"mappings": {"dynamic": False, "fields": {"location": {"type": "geo"}}}}, + {"location": {"type": "geo"}}, ) def setUp(self): @@ -743,12 +705,7 @@ def setUpClass(cls): cls.create_search_index( Article, "mlt_index", - { - "mappings": { - "dynamic": False, - "fields": {"body": {"type": "string"}, "headline": {"type": "string"}}, - } - }, + {"body": {"type": "string"}, "headline": {"type": "string"}}, ) cls.article1 = Article.objects.create( headline="Space exploration", number=1, body="Webb telescope" @@ -782,14 +739,9 @@ def setUpClass(cls): Article, "compound_index", { - "mappings": { - "dynamic": False, - "fields": { - "headline": [{"type": "token"}, {"type": "string"}], - "body": {"type": "string"}, - "number": {"type": "number"}, - }, - } + "headline": [{"type": "token"}, {"type": "string"}], + "body": {"type": "string"}, + "number": {"type": "number"}, }, ) @@ -962,26 +914,20 @@ def test_str_returns_expected_format(self): class SearchVectorTests(SearchUtilsMixin): @classmethod def setUpClass(cls): - cls.create_search_index( - Article, - "vector_index", - { - "fields": [ - { - "type": "vector", - "path": "plot_embedding", - "numDimensions": 3, - "similarity": "cosine", - "quantization": "scalar", - }, - { - "type": "filter", - "path": "number", - }, - ] - }, - type="vectorSearch", + model = Article + idx = VectorSearchIndex( + fields=["plot_embedding", "number"], + name="vector_index", + similarities="cosine", ) + with connection.schema_editor() as editor: + editor.add_index(model, idx) + + def drop_index(): + with connection.schema_editor() as editor: + editor.remove_index(model, idx) + + cls.addClassCleanup(drop_index) def setUp(self): self.mars = Article.objects.create( diff --git a/tests/indexes_/test_search_indexes.py b/tests/indexes_/test_search_indexes.py index 59f2fa532..4ceb96f7d 100644 --- a/tests/indexes_/test_search_indexes.py +++ b/tests/indexes_/test_search_indexes.py @@ -53,6 +53,29 @@ def test_no_extra_kargs(self): with self.assertRaisesMessage(TypeError, msg): SearchIndex(condition="") + def test_fields_and_field_mappings(self): + msg = "Cannot provide fields and fields_mappings" + with self.assertRaisesMessage(ValueError, msg): + SearchIndex(fields=["foo"], field_mappings={"foo": {}}) + + def test_field_mappings_type(self): + msg = ( + "field_mappings must be a dictionary mapping field names to their " + "Atlas Search field mappings." + ) + with self.assertRaisesMessage(ValueError, msg): + SearchIndex(field_mappings={"foo"}) + + def test_analyzer_type(self): + msg = "analyzer must be a string. got type: " + with self.assertRaisesMessage(ValueError, msg): + SearchIndex(analyzer=42) + + def test_search_analyzer_type(self): + msg = "search_analyzer must be a string. got type: " + with self.assertRaisesMessage(ValueError, msg): + SearchIndex(search_analyzer=["foo"]) + class VectorSearchIndexTests(SimpleTestCase): def test_no_init_args(self): @@ -179,6 +202,77 @@ def test_valid_fields(self): }, } self.assertCountEqual(index_info[index.name]["columns"], index.fields) + self.assertEqual(index_info[index.name]["options"]["mappings"], expected_options) + finally: + with connection.schema_editor() as editor: + editor.remove_index(index=index, model=SearchIndexTestModel) + + def test_field_mappings(self): + index = SearchIndex( + name="recent_test_idx", + field_mappings={ + "char": { + "indexOptions": "offsets", + "norms": "include", + "store": True, + "type": "string", + } + }, + ) + with connection.schema_editor() as editor: + editor.add_index(index=index, model=SearchIndexTestModel) + try: + index_info = connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ) + expected_options = { + "dynamic": False, + "fields": { + "char": { + "indexOptions": "offsets", + "norms": "include", + "store": True, + "type": "string", + } + }, + } + self.assertCountEqual(index_info[index.name]["columns"], index.fields) + self.assertEqual(index_info[index.name]["options"]["mappings"], expected_options) + finally: + with connection.schema_editor() as editor: + editor.remove_index(index=index, model=SearchIndexTestModel) + + def test_analyzer_inclusion(self): + index = SearchIndex( + name="recent_test_idx", + fields=["char"], + analyzer="lucene.simple", + search_analyzer="lucene.simple", + ) + with connection.schema_editor() as editor: + editor.add_index(index=index, model=SearchIndexTestModel) + try: + index_info = connection.introspection.get_constraints( + cursor=None, + table_name=SearchIndexTestModel._meta.db_table, + ) + expected_options = { + "analyzer": "lucene.simple", + "searchAnalyzer": "lucene.simple", + "mappings": { + "dynamic": False, + "fields": { + "char": { + "indexOptions": "offsets", + "norms": "include", + "store": True, + "type": "string", + } + }, + }, + } + self.assertCountEqual(index_info[index.name]["columns"], index.fields) self.assertEqual(index_info[index.name]["options"], expected_options) finally: with connection.schema_editor() as editor: