Skip to content

Commit ae09e9b

Browse files
authored
Merge pull request #2719 from bagerard/shane_skip_index_creation_on_save
[Clone] Breaking change: Improve save() performance by skipping index creation
2 parents 8b62b1f + 7094025 commit ae09e9b

File tree

8 files changed

+162
-46
lines changed

8 files changed

+162
-46
lines changed

benchmarks/test_basic_doc_ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
StringField,
1313
)
1414

15-
mongoengine.connect(db="mongoengine_benchmark_test")
15+
mongoengine.connect(db="mongoengine_benchmark_test", w=1)
1616

1717

1818
def timeit(f, n=10000):

benchmarks/test_inserts.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,11 @@ def main():
55
setup = """
66
from pymongo import MongoClient
77
8-
connection = MongoClient()
8+
connection = MongoClient(w=1)
99
connection.drop_database('mongoengine_benchmark_test')
1010
"""
1111

1212
stmt = """
13-
from pymongo import MongoClient
14-
15-
connection = MongoClient()
16-
1713
db = connection.mongoengine_benchmark_test
1814
noddy = db.noddy
1915
@@ -29,13 +25,12 @@ def main():
2925
"""
3026

3127
print("-" * 100)
32-
print("PyMongo: Creating 10000 dictionaries.")
28+
print('PyMongo: Creating 10000 dictionaries (write_concern={"w": 1}).')
3329
t = timeit.Timer(stmt=stmt, setup=setup)
3430
print(f"{t.timeit(1)}s")
3531

3632
stmt = """
37-
from pymongo import MongoClient, WriteConcern
38-
connection = MongoClient()
33+
from pymongo import WriteConcern
3934
4035
db = connection.mongoengine_benchmark_test
4136
noddy = db.noddy.with_options(write_concern=WriteConcern(w=0))
@@ -64,7 +59,7 @@ def main():
6459
connection.close()
6560
6661
from mongoengine import Document, DictField, connect
67-
connect("mongoengine_benchmark_test")
62+
connect("mongoengine_benchmark_test", w=1)
6863
6964
class Noddy(Document):
7065
fields = DictField()
@@ -82,7 +77,7 @@ class Noddy(Document):
8277
"""
8378

8479
print("-" * 100)
85-
print("MongoEngine: Creating 10000 dictionaries.")
80+
print('MongoEngine: Creating 10000 dictionaries (write_concern={"w": 1}).')
8681
t = timeit.Timer(stmt=stmt, setup=setup)
8782
print(f"{t.timeit(1)}s")
8883

benchmarks/test_save_with_indexes.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import timeit
2+
3+
4+
def main():
5+
setup = """
6+
from pymongo import MongoClient
7+
8+
connection = MongoClient()
9+
connection.drop_database("mongoengine_benchmark_test")
10+
connection.close()
11+
12+
from mongoengine import connect, Document, IntField, StringField
13+
connect("mongoengine_benchmark_test", w=1)
14+
15+
class User0(Document):
16+
name = StringField()
17+
age = IntField()
18+
19+
class User1(Document):
20+
name = StringField()
21+
age = IntField()
22+
meta = {"indexes": [["name"]]}
23+
24+
class User2(Document):
25+
name = StringField()
26+
age = IntField()
27+
meta = {"indexes": [["name", "age"]]}
28+
29+
class User3(Document):
30+
name = StringField()
31+
age = IntField()
32+
meta = {"indexes": [["name"]], "auto_create_index_on_save": True}
33+
34+
class User4(Document):
35+
name = StringField()
36+
age = IntField()
37+
meta = {"indexes": [["name", "age"]], "auto_create_index_on_save": True}
38+
"""
39+
40+
stmt = """
41+
for i in range(10000):
42+
User0(name="Nunu", age=9).save()
43+
"""
44+
print("-" * 80)
45+
print("Save 10000 documents with 0 indexes.")
46+
t = timeit.Timer(stmt=stmt, setup=setup)
47+
print(f"{min(t.repeat(repeat=3, number=1))}s")
48+
49+
stmt = """
50+
for i in range(10000):
51+
User1(name="Nunu", age=9).save()
52+
"""
53+
print("-" * 80)
54+
print("Save 10000 documents with 1 index.")
55+
t = timeit.Timer(stmt=stmt, setup=setup)
56+
print(f"{min(t.repeat(repeat=3, number=1))}s")
57+
58+
stmt = """
59+
for i in range(10000):
60+
User2(name="Nunu", age=9).save()
61+
"""
62+
print("-" * 80)
63+
print("Save 10000 documents with 2 indexes.")
64+
t = timeit.Timer(stmt=stmt, setup=setup)
65+
print(f"{min(t.repeat(repeat=3, number=1))}s")
66+
67+
stmt = """
68+
for i in range(10000):
69+
User3(name="Nunu", age=9).save()
70+
"""
71+
print("-" * 80)
72+
print("Save 10000 documents with 1 index (auto_create_index_on_save=True).")
73+
t = timeit.Timer(stmt=stmt, setup=setup)
74+
print(f"{min(t.repeat(repeat=3, number=1))}s")
75+
76+
stmt = """
77+
for i in range(10000):
78+
User4(name="Nunu", age=9).save()
79+
"""
80+
print("-" * 80)
81+
print("Save 10000 documents with 2 indexes (auto_create_index_on_save=True).")
82+
t = timeit.Timer(stmt=stmt, setup=setup)
83+
print(f"{min(t.repeat(repeat=3, number=1))}s")
84+
85+
86+
if __name__ == "__main__":
87+
main()

docs/changelog.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,18 @@ Changelog
77
Development
88
===========
99
- (Fill this out as you fix issues and develop your features).
10+
- BREAKING CHANGE: Improved the performance of :meth:`~mongoengine.Document.save()`
11+
by removing the call to :meth:`~mongoengine.Document.ensure_indexes` unless
12+
``meta['auto_create_index_on_save']`` is set to True. With the default settings, Document indexes
13+
will still be created on the fly, during the first usage of the collection (query, insert, etc),
14+
they will just not be re-created whenever .save() is called.
15+
- Added meta ``auto_create_index_on_save`` so you can enable index creation
16+
on :meth:`~mongoengine.Document.save()` (as it was < 0.26.0).
1017

1118
Changes in 0.25.0
1219
=================
1320
- Support MONGODB-AWS authentication mechanism (with `authmechanismproperties`) #2507
14-
- Turning off dereferencing for the results of distinct query. #2663
21+
- Bug Fix - distinct query doesn't obey the ``no_dereference()``. #2663
1522
- Add tests against Mongo 5.0 in pipeline
1623
- Drop support for Python 3.6 (EOL)
1724
- Bug fix support for PyMongo>=4 to fix "pymongo.errors.InvalidOperation: Cannot use MongoClient after close"

docs/guide/defining-documents.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ There are a few top level defaults for all indexes that can be set::
574574
'index_background': True,
575575
'index_cls': False,
576576
'auto_create_index': True,
577+
'auto_create_index_on_save': False,
577578
}
578579

579580

@@ -588,10 +589,15 @@ There are a few top level defaults for all indexes that can be set::
588589

589590
:attr:`auto_create_index` (Optional)
590591
When this is True (default), MongoEngine will ensure that the correct
591-
indexes exist in MongoDB each time a command is run. This can be disabled
592+
indexes exist in MongoDB when the Document is first used. This can be disabled
592593
in systems where indexes are managed separately. Disabling this will improve
593594
performance.
594595

596+
:attr:`auto_create_index_on_save` (Optional)
597+
When this is True, MongoEngine will ensure that the correct
598+
indexes exist in MongoDB each time :meth:`~mongoengine.document.Document.save`
599+
is run. Enabling this will degrade performance. The default is False. This
600+
option was added in version 0.25.
595601

596602
Compound Indexes and Indexing sub documents
597603
-------------------------------------------

mongoengine/document.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ def _get_collection(cls):
226226
cls._collection = db[collection_name]
227227

228228
# Ensure indexes on the collection unless auto_create_index was
229-
# set to False.
230-
# Also there is no need to ensure indexes on slave.
229+
# set to False. Plus, there is no need to ensure indexes on slave.
231230
db = cls._get_db()
232231
if cls._meta.get("auto_create_index", True) and db.client.is_primary:
233232
cls.ensure_indexes()
@@ -384,6 +383,10 @@ def save(
384383
meta['cascade'] = True. Also you can pass different kwargs to
385384
the cascade save using cascade_kwargs which overwrites the
386385
existing kwargs with custom values.
386+
.. versionchanged:: 0.26
387+
save() no longer calls :meth:`~mongoengine.Document.ensure_indexes`
388+
unless ``meta['auto_create_index_on_save']`` is set to True.
389+
387390
"""
388391
signal_kwargs = signal_kwargs or {}
389392

@@ -407,7 +410,13 @@ def save(
407410
# it might be refreshed by the pre_save_post_validation hook, e.g., for etag generation
408411
doc = self.to_mongo()
409412

410-
if self._meta.get("auto_create_index", True):
413+
# Initialize the Document's underlying pymongo.Collection (+create indexes) if not already initialized
414+
# Important to do this here to avoid that the index creation gets wrapped in the try/except block below
415+
# and turned into mongoengine.OperationError
416+
if self._collection is None:
417+
_ = self._get_collection()
418+
elif self._meta.get("auto_create_index_on_save", False):
419+
# ensure_indexes is called as part of _get_collection so no need to re-call it again here
411420
self.ensure_indexes()
412421

413422
try:
@@ -880,6 +889,10 @@ def ensure_indexes(cls):
880889
Document collection (query, save, etc) so unless you disabled `auto_create_index`, you
881890
shouldn't have to call this manually.
882891
892+
This also gets called upon every call to Document.save if `auto_create_index_on_save` is set to True
893+
894+
If called multiple times, MongoDB will not re-recreate indexes if they exist already
895+
883896
.. note:: You can disable automatic index creation by setting
884897
`auto_create_index` to False in the documents meta data
885898
"""

tests/document/test_indexes.py

+37-29
Original file line numberDiff line numberDiff line change
@@ -983,44 +983,52 @@ class Book(Document):
983983

984984
def test_indexes_after_database_drop(self):
985985
"""
986-
Test to ensure that indexes are re-created on a collection even
987-
after the database has been dropped.
986+
Test to ensure that indexes are not re-created on a collection
987+
after the database has been dropped unless auto_create_index_on_save
988+
is enabled.
988989
989-
Issue #812
990+
Issue #812 and #1446.
990991
"""
991992
# Use a new connection and database since dropping the database could
992993
# cause concurrent tests to fail.
993-
connection = connect(
994-
db="tempdatabase", alias="test_indexes_after_database_drop"
995-
)
994+
tmp_alias = "test_indexes_after_database_drop"
995+
connection = connect(db="tempdatabase", alias=tmp_alias)
996+
self.addCleanup(connection.drop_database, "tempdatabase")
996997

997998
class BlogPost(Document):
998-
title = StringField()
999999
slug = StringField(unique=True)
1000+
meta = {"db_alias": tmp_alias}
10001001

1001-
meta = {"db_alias": "test_indexes_after_database_drop"}
1002+
BlogPost.drop_collection()
1003+
BlogPost(slug="test").save()
1004+
with pytest.raises(NotUniqueError):
1005+
BlogPost(slug="test").save()
10021006

1003-
try:
1004-
BlogPost.drop_collection()
1005-
1006-
# Create Post #1
1007-
post1 = BlogPost(title="test1", slug="test")
1008-
post1.save()
1009-
1010-
# Drop the Database
1011-
connection.drop_database("tempdatabase")
1012-
1013-
# Re-create Post #1
1014-
post1 = BlogPost(title="test1", slug="test")
1015-
post1.save()
1016-
1017-
# Create Post #2
1018-
post2 = BlogPost(title="test2", slug="test")
1019-
with pytest.raises(NotUniqueError):
1020-
post2.save()
1021-
finally:
1022-
# Drop the temporary database at the end
1023-
connection.drop_database("tempdatabase")
1007+
# Drop the Database
1008+
connection.drop_database("tempdatabase")
1009+
BlogPost(slug="test").save()
1010+
# No error because the index was not recreated after dropping the database.
1011+
BlogPost(slug="test").save()
1012+
1013+
# Repeat with auto_create_index_on_save: True.
1014+
class BlogPost2(Document):
1015+
slug = StringField(unique=True)
1016+
meta = {
1017+
"db_alias": tmp_alias,
1018+
"auto_create_index_on_save": True,
1019+
}
1020+
1021+
BlogPost2.drop_collection()
1022+
BlogPost2(slug="test").save()
1023+
with pytest.raises(NotUniqueError):
1024+
BlogPost2(slug="test").save()
1025+
1026+
# Drop the Database
1027+
connection.drop_database("tempdatabase")
1028+
BlogPost2(slug="test").save()
1029+
# Error because ensure_indexes is run on every save().
1030+
with pytest.raises(NotUniqueError):
1031+
BlogPost2(slug="test").save()
10241032

10251033
def test_index_dont_send_cls_option(self):
10261034
"""

tests/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _inner(*args, **kwargs):
7171
return func(*args, **kwargs)
7272

7373
pretty_version = ".".join(str(n) for n in mongo_version_req)
74-
pytest.skip(f"Needs MongoDB v{pretty_version}+")
74+
pytest.skip(f"Needs MongoDB {oper.__name__} v{pretty_version}")
7575

7676
_inner.__name__ = func.__name__
7777
_inner.__doc__ = func.__doc__

0 commit comments

Comments
 (0)