Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions django_mongodb_backend/aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def aggregate(
node.set_source_expressions([Case(condition), *source_expressions[1:]])
else:
node = self
lhs_mql = process_lhs(node, compiler, connection)
lhs_mql = process_lhs(node, compiler, connection, as_expr=True)
if resolve_inner_expression:
return lhs_mql
operator = operator or MONGO_AGGREGATIONS.get(self.__class__, self.function.lower())
Expand All @@ -46,9 +46,9 @@ def count(self, compiler, connection, resolve_inner_expression=False, **extra_co
self.filter, then=Case(When(IsNull(source_expressions[0], False), then=Value(1)))
)
node.set_source_expressions([Case(condition), *source_expressions[1:]])
inner_expression = process_lhs(node, compiler, connection)
inner_expression = process_lhs(node, compiler, connection, as_expr=True)
else:
lhs_mql = process_lhs(self, compiler, connection)
lhs_mql = process_lhs(self, compiler, connection, as_expr=True)
null_cond = {"$in": [{"$type": lhs_mql}, ["missing", "null"]]}
inner_expression = {
"$cond": {"if": null_cond, "then": None, "else": lhs_mql if self.distinct else 1}
Expand All @@ -58,7 +58,7 @@ def count(self, compiler, connection, resolve_inner_expression=False, **extra_co
return {"$sum": inner_expression}
# If distinct=True or resolve_inner_expression=False, sum the size of the
# set.
lhs_mql = process_lhs(self, compiler, connection)
lhs_mql = process_lhs(self, compiler, connection, as_expr=True)
# None shouldn't be counted, so subtract 1 if it's present.
exits_null = {"$cond": {"if": {"$in": [{"$literal": None}, lhs_mql]}, "then": -1, "else": 0}}
return {"$add": [{"$size": lhs_mql}, exits_null]}
Expand All @@ -73,7 +73,7 @@ def stddev_variance(self, compiler, connection, **extra_context):


def register_aggregates():
Aggregate.as_mql = aggregate
Count.as_mql = count
StdDev.as_mql = stddev_variance
Variance.as_mql = stddev_variance
Aggregate.as_mql_expr = aggregate
Count.as_mql_expr = count
StdDev.as_mql_expr = stddev_variance
Variance.as_mql_expr = stddev_variance
Comment on lines +76 to +79
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see we have as_mql_expr(), as_mql_path(), and as_mql(..., as_path=...). If this is the way we keep it, it would be good to explain in the design document which objects (aggregate, func, expression, etc.) get which.

I wonder about renaming as_mql_expr() or as_mql_path() to as_mql() (i.e. treating one of paths as the default). Do you think it would be more or less confusing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that was the idea. I’ll explain it in the docs, and we might also consider renaming some methods. The core concept is:

  • Every expression has an as_mql method.
  • In some cases, it’s simpler to implement as_mql directly, so those methods don’t follow the common expression flow.
  • For other expressions, as_mql is a composite function that delegates to as_path or as_expr when applied.
  • The base_expression.as_mql method controls when these are called and performs boilerplate checks to prevent nesting an expr inside another expr (a MongoDB 6 restriction).

In short: every object has as_mql. Some also define as_path and as_expr. The base_expression coordinates how these methods are used, except for cases where as_mql is defined directly.

Copy link
Collaborator Author

@WaVEV WaVEV Sep 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc here: link

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timgraham I actually like the decoupling of as_mql and as_mql_(path | expr). I view it as, you need to define at least two:
as_mql and as_mql_expr.

Then if you want to have the more optimized function you define as_mql_path. It feels less confusing to me that way.

Copy link
Collaborator Author

@WaVEV WaVEV Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 An expression needs at least one of the methods, as_mql_expr is enough (not optimal but functional). The aggregation module is a good example. Because the as_mql is defined in baseExpression class. In this approach most of the expressions don't need as_mql (if as_mql is defined, expr or path aren't needed).
EDIT: Sorry, I rushed the answer before read all the text, you got the point well. 😬

90 changes: 71 additions & 19 deletions django_mongodb_backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import logging
import os

from django.core.exceptions import ImproperlyConfigured
from bson import Decimal128
from django.core.exceptions import EmptyResultSet, FullResultSet, ImproperlyConfigured
from django.db import DEFAULT_DB_ALIAS
from django.db.backends.base.base import BaseDatabaseWrapper
from django.db.backends.utils import debug_transaction
Expand All @@ -20,7 +21,7 @@
from .features import DatabaseFeatures
from .introspection import DatabaseIntrospection
from .operations import DatabaseOperations
from .query_utils import regex_match
from .query_utils import regex_expr, regex_match
from .schema import DatabaseSchemaEditor
from .utils import OperationDebugWrapper
from .validation import DatabaseValidation
Expand Down Expand Up @@ -97,40 +98,91 @@ class DatabaseWrapper(BaseDatabaseWrapper):
}
_connection_pools = {}

def _isnull_operator(a, b):
is_null = {
def _isnull_operator_expr(field, is_null):
is_null_expr = {
"$or": [
# The path does not exist (i.e. is "missing")
{"$eq": [{"$type": a}, "missing"]},
{"$eq": [{"$type": field}, "missing"]},
# or the value is None.
{"$eq": [a, None]},
{"$eq": [field, None]},
]
}
return is_null if b else {"$not": is_null}
return is_null_expr if is_null else {"$not": is_null_expr}

mongo_operators = {
mongo_expr_operators = {
"exact": lambda a, b: {"$eq": [a, b]},
"gt": lambda a, b: {"$gt": [a, b]},
"gte": lambda a, b: {"$gte": [a, b]},
# MongoDB considers null less than zero. Exclude null values to match
# SQL behavior.
"lt": lambda a, b: {"$and": [{"$lt": [a, b]}, DatabaseWrapper._isnull_operator(a, False)]},
"lt": lambda a, b: {
"$and": [{"$lt": [a, b]}, DatabaseWrapper._isnull_operator_expr(a, False)]
},
"lte": lambda a, b: {
"$and": [{"$lte": [a, b]}, DatabaseWrapper._isnull_operator(a, False)]
"$and": [{"$lte": [a, b]}, DatabaseWrapper._isnull_operator_expr(a, False)]
},
"in": lambda a, b: {"$in": [a, b]},
"isnull": _isnull_operator,
"in": lambda a, b: {"$in": (a, b)},
"isnull": _isnull_operator_expr,
"range": lambda a, b: {
"$and": [
{"$or": [DatabaseWrapper._isnull_operator(b[0], True), {"$gte": [a, b[0]]}]},
{"$or": [DatabaseWrapper._isnull_operator(b[1], True), {"$lte": [a, b[1]]}]},
{"$or": [DatabaseWrapper._isnull_operator_expr(b[0], True), {"$gte": [a, b[0]]}]},
{"$or": [DatabaseWrapper._isnull_operator_expr(b[1], True), {"$lte": [a, b[1]]}]},
]
},
"iexact": lambda a, b: regex_match(a, ("^", b, {"$literal": "$"}), insensitive=True),
"startswith": lambda a, b: regex_match(a, ("^", b)),
"istartswith": lambda a, b: regex_match(a, ("^", b), insensitive=True),
"endswith": lambda a, b: regex_match(a, (b, {"$literal": "$"})),
"iendswith": lambda a, b: regex_match(a, (b, {"$literal": "$"}), insensitive=True),
"iexact": lambda a, b: regex_expr(a, ("^", b, {"$literal": "$"}), insensitive=True),
"startswith": lambda a, b: regex_expr(a, ("^", b)),
"istartswith": lambda a, b: regex_expr(a, ("^", b), insensitive=True),
"endswith": lambda a, b: regex_expr(a, (b, {"$literal": "$"})),
"iendswith": lambda a, b: regex_expr(a, (b, {"$literal": "$"}), insensitive=True),
"contains": lambda a, b: regex_expr(a, b),
"icontains": lambda a, b: regex_expr(a, b, insensitive=True),
"regex": lambda a, b: regex_expr(a, b),
"iregex": lambda a, b: regex_expr(a, b, insensitive=True),
}

def range_match(a, b):
conditions = []
start, end = b
if start is not None:
conditions.append({a: {"$gte": b[0]}})
if end is not None:
conditions.append({a: {"$lte": b[1]}})
if not conditions:
raise FullResultSet
if start is not None and end is not None:
if isinstance(start, Decimal128):
start = start.to_decimal()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

potencial overflow risk here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a failing test without the to_decimal() calls? I'm not sure why there would be a mix of Decimal/Decimal128 here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a BSON type held at the database and returned locally. I think it should be good.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, there is: model_fields_.test_polymorphic_embedded_model.QueryingTests.test_range. Maybe this case shouldn't be handled here, and let the query optimizer to remove those False cases.

if isinstance(end, Decimal128):
end = end.to_decimal()
if start > end:
raise EmptyResultSet
return {"$and": conditions}

def _isnull_operator_match(field, is_null):
if is_null:
return {"$or": [{field: {"$exists": False}}, {field: None}]}
return {"$and": [{field: {"$exists": True}}, {field: {"$ne": None}}]}

mongo_operators = {
"exact": lambda a, b: {a: b},
"gt": lambda a, b: {a: {"$gt": b}},
"gte": lambda a, b: {a: {"$gte": b}},
# MongoDB considers null less than zero. Exclude null values to match
# SQL behavior.
"lt": lambda a, b: {
"$and": [{a: {"$lt": b}}, DatabaseWrapper._isnull_operator_match(a, False)]
},
"lte": lambda a, b: {
"$and": [{a: {"$lte": b}}, DatabaseWrapper._isnull_operator_match(a, False)]
},
"in": lambda a, b: {a: {"$in": tuple(b)}},
"isnull": _isnull_operator_match,
"range": range_match,
"iexact": lambda a, b: regex_match(a, f"^{b}$", insensitive=True),
"startswith": lambda a, b: regex_match(a, f"^{b}"),
"istartswith": lambda a, b: regex_match(a, f"^{b}", insensitive=True),
"endswith": lambda a, b: regex_match(a, f"{b}$"),
"iendswith": lambda a, b: regex_match(a, f"{b}$", insensitive=True),
"contains": lambda a, b: regex_match(a, b),
"icontains": lambda a, b: regex_match(a, b, insensitive=True),
"regex": lambda a, b: regex_match(a, b),
Expand Down
32 changes: 17 additions & 15 deletions django_mongodb_backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,14 @@ def _get_replace_expr(self, sub_expr, group, alias):
if getattr(sub_expr, "distinct", False):
# If the expression should return distinct values, use $addToSet to
# deduplicate.
rhs = sub_expr.as_mql(self, self.connection, resolve_inner_expression=True)
rhs = sub_expr.as_mql(
self, self.connection, resolve_inner_expression=True, as_expr=True
)
group[alias] = {"$addToSet": rhs}
replacing_expr = sub_expr.copy()
replacing_expr.set_source_expressions([inner_column, None])
else:
group[alias] = sub_expr.as_mql(self, self.connection)
group[alias] = sub_expr.as_mql(self, self.connection, as_expr=True)
replacing_expr = inner_column
# Count must return 0 rather than null.
if isinstance(sub_expr, Count):
Expand Down Expand Up @@ -302,9 +304,7 @@ def _compound_searches_queries(self, search_replacements):
search.as_mql(self, self.connection),
{
"$addFields": {
result_col.as_mql(self, self.connection, as_path=True): {
"$meta": score_function
}
result_col.as_mql(self, self.connection): {"$meta": score_function}
}
},
]
Expand Down Expand Up @@ -334,7 +334,7 @@ def pre_sql_setup(self, with_col_aliases=False):
pipeline.extend(query.get_pipeline())
# Remove the added subqueries.
self.subqueries = []
pipeline.append({"$match": {"$expr": having}})
pipeline.append({"$match": having})
self.aggregation_pipeline = pipeline
self.annotations = {
target: expr.replace_expressions(all_replacements)
Expand Down Expand Up @@ -481,11 +481,11 @@ def build_query(self, columns=None):
query.lookup_pipeline = self.get_lookup_pipeline()
where = self.get_where()
try:
expr = where.as_mql(self, self.connection) if where else {}
match = where.as_mql(self, self.connection) if where else {}
except FullResultSet:
query.match_mql = {}
else:
query.match_mql = {"$expr": expr}
query.match_mql = match
if extra_fields:
query.extra_fields = self.get_project_fields(extra_fields, force_expression=True)
query.subqueries = self.subqueries
Expand Down Expand Up @@ -643,7 +643,9 @@ def get_combinator_queries(self):
for alias, expr in self.columns:
# Unfold foreign fields.
if isinstance(expr, Col) and expr.alias != self.collection_name:
ids[expr.alias][expr.target.column] = expr.as_mql(self, self.connection)
ids[expr.alias][expr.target.column] = expr.as_mql(
self, self.connection, as_expr=True
)
else:
ids[alias] = f"${alias}"
# Convert defaultdict to dict so it doesn't appear as
Expand Down Expand Up @@ -707,16 +709,16 @@ def get_project_fields(self, columns=None, ordering=None, force_expression=False
# For brevity/simplicity, project {"field_name": 1}
# instead of {"field_name": "$field_name"}.
if isinstance(expr, Col) and name == expr.target.column and not force_expression
else expr.as_mql(self, self.connection)
else expr.as_mql(self, self.connection, as_expr=True)
)
except EmptyResultSet:
empty_result_set_value = getattr(expr, "empty_result_set_value", NotImplemented)
value = (
False if empty_result_set_value is NotImplemented else empty_result_set_value
)
fields[collection][name] = Value(value).as_mql(self, self.connection)
fields[collection][name] = Value(value).as_mql(self, self.connection, as_expr=True)
except FullResultSet:
fields[collection][name] = Value(True).as_mql(self, self.connection)
fields[collection][name] = Value(True).as_mql(self, self.connection, as_expr=True)
# Annotations (stored in None) and the main collection's fields
# should appear in the top-level of the fields dict.
fields.update(fields.pop(None, {}))
Expand All @@ -739,10 +741,10 @@ def _get_ordering(self):
idx = itertools.count(start=1)
for order in self.order_by_objs or []:
if isinstance(order.expression, Col):
field_name = order.as_mql(self, self.connection).removeprefix("$")
field_name = order.as_mql(self, self.connection, as_expr=True).removeprefix("$")
fields.append((order.expression.target.column, order.expression))
elif isinstance(order.expression, Ref):
field_name = order.as_mql(self, self.connection).removeprefix("$")
field_name = order.as_mql(self, self.connection, as_expr=True).removeprefix("$")
else:
field_name = f"__order{next(idx)}"
fields.append((field_name, order.expression))
Expand Down Expand Up @@ -879,7 +881,7 @@ def execute_sql(self, result_type):
)
prepared = field.get_db_prep_save(value, connection=self.connection)
if hasattr(value, "as_mql"):
prepared = prepared.as_mql(self, self.connection)
prepared = prepared.as_mql(self, self.connection, as_expr=True)
values[field.column] = prepared
try:
criteria = self.build_query().match_mql
Expand Down
Loading