diff --git a/benchmarks/test_basic_doc_ops.py b/benchmarks/test_basic_doc_ops.py index 66b6a17f9..8b8bf4aaf 100644 --- a/benchmarks/test_basic_doc_ops.py +++ b/benchmarks/test_basic_doc_ops.py @@ -12,7 +12,7 @@ StringField, ) -mongoengine.connect(db="mongoengine_benchmark_test") +mongoengine.connect(db="mongoengine_benchmark_test", w=1) def timeit(f, n=10000): diff --git a/benchmarks/test_inserts.py b/benchmarks/test_inserts.py index dcd18ff88..8e8419933 100644 --- a/benchmarks/test_inserts.py +++ b/benchmarks/test_inserts.py @@ -5,15 +5,11 @@ def main(): setup = """ from pymongo import MongoClient -connection = MongoClient() +connection = MongoClient(w=1) connection.drop_database('mongoengine_benchmark_test') """ stmt = """ -from pymongo import MongoClient - -connection = MongoClient() - db = connection.mongoengine_benchmark_test noddy = db.noddy @@ -29,13 +25,12 @@ def main(): """ print("-" * 100) - print("PyMongo: Creating 10000 dictionaries.") + print('PyMongo: Creating 10000 dictionaries (write_concern={"w": 1}).') t = timeit.Timer(stmt=stmt, setup=setup) print(f"{t.timeit(1)}s") stmt = """ -from pymongo import MongoClient, WriteConcern -connection = MongoClient() +from pymongo import WriteConcern db = connection.mongoengine_benchmark_test noddy = db.noddy.with_options(write_concern=WriteConcern(w=0)) @@ -64,7 +59,7 @@ def main(): connection.close() from mongoengine import Document, DictField, connect -connect("mongoengine_benchmark_test") +connect("mongoengine_benchmark_test", w=1) class Noddy(Document): fields = DictField() @@ -82,7 +77,7 @@ class Noddy(Document): """ print("-" * 100) - print("MongoEngine: Creating 10000 dictionaries.") + print('MongoEngine: Creating 10000 dictionaries (write_concern={"w": 1}).') t = timeit.Timer(stmt=stmt, setup=setup) print(f"{t.timeit(1)}s") diff --git a/benchmarks/test_save_with_indexes.py b/benchmarks/test_save_with_indexes.py new file mode 100644 index 000000000..86e281cb3 --- /dev/null +++ b/benchmarks/test_save_with_indexes.py @@ -0,0 +1,87 @@ +import timeit + + +def main(): + setup = """ +from pymongo import MongoClient + +connection = MongoClient() +connection.drop_database("mongoengine_benchmark_test") +connection.close() + +from mongoengine import connect, Document, IntField, StringField +connect("mongoengine_benchmark_test", w=1) + +class User0(Document): + name = StringField() + age = IntField() + +class User1(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name"]]} + +class User2(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name", "age"]]} + +class User3(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name"]], "auto_create_index_on_save": True} + +class User4(Document): + name = StringField() + age = IntField() + meta = {"indexes": [["name", "age"]], "auto_create_index_on_save": True} +""" + + stmt = """ +for i in range(10000): + User0(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 0 indexes.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User1(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 1 index.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User2(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 2 indexes.") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User3(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 1 index (auto_create_index_on_save=True).") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + stmt = """ +for i in range(10000): + User4(name="Nunu", age=9).save() +""" + print("-" * 80) + print("Save 10000 documents with 2 indexes (auto_create_index_on_save=True).") + t = timeit.Timer(stmt=stmt, setup=setup) + print(f"{min(t.repeat(repeat=3, number=1))}s") + + +if __name__ == "__main__": + main() diff --git a/docs/changelog.rst b/docs/changelog.rst index fd39f96e5..3182cabf6 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,11 +7,18 @@ Changelog Development =========== - (Fill this out as you fix issues and develop your features). +- BREAKING CHANGE: Improved the performance of :meth:`~mongoengine.Document.save()` + by removing the call to :meth:`~mongoengine.Document.ensure_indexes` unless + ``meta['auto_create_index_on_save']`` is set to True. With the default settings, Document indexes + will still be created on the fly, during the first usage of the collection (query, insert, etc), + they will just not be re-created whenever .save() is called. +- Added meta ``auto_create_index_on_save`` so you can enable index creation + on :meth:`~mongoengine.Document.save()` (as it was < 0.26.0). Changes in 0.25.0 ================= - Support MONGODB-AWS authentication mechanism (with `authmechanismproperties`) #2507 -- Turning off dereferencing for the results of distinct query. #2663 +- Bug Fix - distinct query doesn't obey the ``no_dereference()``. #2663 - Add tests against Mongo 5.0 in pipeline - Drop support for Python 3.6 (EOL) - Bug fix support for PyMongo>=4 to fix "pymongo.errors.InvalidOperation: Cannot use MongoClient after close" diff --git a/docs/guide/defining-documents.rst b/docs/guide/defining-documents.rst index 53758672a..df749ee1e 100644 --- a/docs/guide/defining-documents.rst +++ b/docs/guide/defining-documents.rst @@ -574,6 +574,7 @@ There are a few top level defaults for all indexes that can be set:: 'index_background': True, 'index_cls': False, 'auto_create_index': True, + 'auto_create_index_on_save': False, } @@ -588,10 +589,15 @@ There are a few top level defaults for all indexes that can be set:: :attr:`auto_create_index` (Optional) When this is True (default), MongoEngine will ensure that the correct - indexes exist in MongoDB each time a command is run. This can be disabled + indexes exist in MongoDB when the Document is first used. This can be disabled in systems where indexes are managed separately. Disabling this will improve performance. +:attr:`auto_create_index_on_save` (Optional) + When this is True, MongoEngine will ensure that the correct + indexes exist in MongoDB each time :meth:`~mongoengine.document.Document.save` + is run. Enabling this will degrade performance. The default is False. This + option was added in version 0.25. Compound Indexes and Indexing sub documents ------------------------------------------- diff --git a/mongoengine/document.py b/mongoengine/document.py index e7a1938f2..7e61af5a1 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -226,8 +226,7 @@ def _get_collection(cls): cls._collection = db[collection_name] # Ensure indexes on the collection unless auto_create_index was - # set to False. - # Also there is no need to ensure indexes on slave. + # set to False. Plus, there is no need to ensure indexes on slave. db = cls._get_db() if cls._meta.get("auto_create_index", True) and db.client.is_primary: cls.ensure_indexes() @@ -384,6 +383,10 @@ def save( meta['cascade'] = True. Also you can pass different kwargs to the cascade save using cascade_kwargs which overwrites the existing kwargs with custom values. + .. versionchanged:: 0.26 + save() no longer calls :meth:`~mongoengine.Document.ensure_indexes` + unless ``meta['auto_create_index_on_save']`` is set to True. + """ signal_kwargs = signal_kwargs or {} @@ -407,7 +410,13 @@ def save( # it might be refreshed by the pre_save_post_validation hook, e.g., for etag generation doc = self.to_mongo() - if self._meta.get("auto_create_index", True): + # Initialize the Document's underlying pymongo.Collection (+create indexes) if not already initialized + # Important to do this here to avoid that the index creation gets wrapped in the try/except block below + # and turned into mongoengine.OperationError + if self._collection is None: + _ = self._get_collection() + elif self._meta.get("auto_create_index_on_save", False): + # ensure_indexes is called as part of _get_collection so no need to re-call it again here self.ensure_indexes() try: @@ -880,6 +889,10 @@ def ensure_indexes(cls): Document collection (query, save, etc) so unless you disabled `auto_create_index`, you shouldn't have to call this manually. + This also gets called upon every call to Document.save if `auto_create_index_on_save` is set to True + + If called multiple times, MongoDB will not re-recreate indexes if they exist already + .. note:: You can disable automatic index creation by setting `auto_create_index` to False in the documents meta data """ diff --git a/tests/document/test_indexes.py b/tests/document/test_indexes.py index 4d56f8553..f367cd0f9 100644 --- a/tests/document/test_indexes.py +++ b/tests/document/test_indexes.py @@ -983,44 +983,52 @@ class Book(Document): def test_indexes_after_database_drop(self): """ - Test to ensure that indexes are re-created on a collection even - after the database has been dropped. + Test to ensure that indexes are not re-created on a collection + after the database has been dropped unless auto_create_index_on_save + is enabled. - Issue #812 + Issue #812 and #1446. """ # Use a new connection and database since dropping the database could # cause concurrent tests to fail. - connection = connect( - db="tempdatabase", alias="test_indexes_after_database_drop" - ) + tmp_alias = "test_indexes_after_database_drop" + connection = connect(db="tempdatabase", alias=tmp_alias) + self.addCleanup(connection.drop_database, "tempdatabase") class BlogPost(Document): - title = StringField() slug = StringField(unique=True) + meta = {"db_alias": tmp_alias} - meta = {"db_alias": "test_indexes_after_database_drop"} + BlogPost.drop_collection() + BlogPost(slug="test").save() + with pytest.raises(NotUniqueError): + BlogPost(slug="test").save() - try: - BlogPost.drop_collection() - - # Create Post #1 - post1 = BlogPost(title="test1", slug="test") - post1.save() - - # Drop the Database - connection.drop_database("tempdatabase") - - # Re-create Post #1 - post1 = BlogPost(title="test1", slug="test") - post1.save() - - # Create Post #2 - post2 = BlogPost(title="test2", slug="test") - with pytest.raises(NotUniqueError): - post2.save() - finally: - # Drop the temporary database at the end - connection.drop_database("tempdatabase") + # Drop the Database + connection.drop_database("tempdatabase") + BlogPost(slug="test").save() + # No error because the index was not recreated after dropping the database. + BlogPost(slug="test").save() + + # Repeat with auto_create_index_on_save: True. + class BlogPost2(Document): + slug = StringField(unique=True) + meta = { + "db_alias": tmp_alias, + "auto_create_index_on_save": True, + } + + BlogPost2.drop_collection() + BlogPost2(slug="test").save() + with pytest.raises(NotUniqueError): + BlogPost2(slug="test").save() + + # Drop the Database + connection.drop_database("tempdatabase") + BlogPost2(slug="test").save() + # Error because ensure_indexes is run on every save(). + with pytest.raises(NotUniqueError): + BlogPost2(slug="test").save() def test_index_dont_send_cls_option(self): """ diff --git a/tests/utils.py b/tests/utils.py index 0dcdb2dbf..7d0eb33f2 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -71,7 +71,7 @@ def _inner(*args, **kwargs): return func(*args, **kwargs) pretty_version = ".".join(str(n) for n in mongo_version_req) - pytest.skip(f"Needs MongoDB v{pretty_version}+") + pytest.skip(f"Needs MongoDB {oper.__name__} v{pretty_version}") _inner.__name__ = func.__name__ _inner.__doc__ = func.__doc__