Skip to content

INTPYTHON-483 Add querying support for EmbeddedModelArrayField #303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion django_mongodb_backend/fields/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class ArrayLenTransform(Transform):

def as_mql(self, compiler, connection):
lhs_mql = process_lhs(self, compiler, connection)
return {"$cond": {"if": {"$eq": [lhs_mql, None]}, "then": None, "else": {"$size": lhs_mql}}}
return {"$cond": {"if": {"$isArray": lhs_mql}, "then": {"$size": lhs_mql}, "else": None}}


@ArrayField.register_lookup
Expand Down
5 changes: 3 additions & 2 deletions django_mongodb_backend/fields/embedded_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@ def as_mql(self, compiler, connection):
key_transforms.insert(0, previous.key_name)
previous = previous.lhs
mql = previous.as_mql(compiler, connection)
transforms = ".".join(key_transforms)
return f"{mql}.{transforms}"
for key in key_transforms:
mql = {"$getField": {"input": mql, "field": key}}
return mql

@property
def output_field(self):
Expand Down
206 changes: 204 additions & 2 deletions django_mongodb_backend/fields/embedded_model_array.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from django.db.models import Field
import difflib

from django.core.exceptions import FieldDoesNotExist
from django.db.models import Field, lookups
from django.db.models.expressions import Col
from django.db.models.fields.related import lazy_related_operation
from django.db.models.lookups import Lookup, Transform

from .. import forms
from ..query_utils import process_lhs, process_rhs
from . import EmbeddedModelField
from .array import ArrayField
from .array import ArrayField, ArrayLenTransform


class EmbeddedModelArrayField(ArrayField):
Expand Down Expand Up @@ -56,3 +62,199 @@ def formfield(self, **kwargs):
**kwargs,
},
)

def get_transform(self, name):
transform = super().get_transform(name)
if transform:
return transform
return KeyTransformFactory(name, self)

def _get_lookup(self, lookup_name):
lookup = super()._get_lookup(lookup_name)
if lookup is None or lookup is ArrayLenTransform:
return lookup

class EmbeddedModelArrayFieldLookups(Lookup):
def as_mql(self, compiler, connection):
raise ValueError(
"Lookups aren't supported on EmbeddedModelArrayField. "
"Try querying one of its embedded fields instead."
)

return EmbeddedModelArrayFieldLookups


class _EmbeddedModelArrayOutputField(ArrayField):
"""
Represent the output of an EmbeddedModelArrayField when traversed in a
query path.

This field is not meant to be used in model definitions. It exists solely
to support query output resolution. When an EmbeddedModelArrayField is
accessed in a query, the result should behave like an array of the embedded
model's target type.

While it mimics ArrayField's lookup behavior, the way those lookups are
resolved follows the semantics of EmbeddedModelArrayField rather than
ArrayField.
"""

ALLOWED_LOOKUPS = {
"in",
"exact",
"iexact",
"gt",
"gte",
"lt",
"lte",
}

def get_lookup(self, name):
return super().get_lookup(name) if name in self.ALLOWED_LOOKUPS else None


class EmbeddedModelArrayFieldBuiltinLookup(Lookup):
def process_rhs(self, compiler, connection):
value = self.rhs
if not self.get_db_prep_lookup_value_is_iterable:
value = [value]
# Value must be serialized based on the query target. If querying a
# subfield inside the array (i.e., a nested KeyTransform), use the
# output field of the subfield. Otherwise, use the base field of the
# array itself.
get_db_prep_value = self.lhs._lhs.output_field.get_db_prep_value
return None, [
v if hasattr(v, "as_mql") else get_db_prep_value(v, connection, prepared=True)
for v in value
]

def as_mql(self, compiler, connection):
# Querying a subfield within the array elements (via nested
# KeyTransform). Replicate MongoDB's implicit ANY-match by mapping over
# the array and applying $in on the subfield.
lhs_mql = process_lhs(self, compiler, connection)
inner_lhs_mql = lhs_mql["$ifNull"][0]["$map"]["in"]
values = process_rhs(self, compiler, connection)
lhs_mql["$ifNull"][0]["$map"]["in"] = connection.mongo_operators[self.lookup_name](
inner_lhs_mql, values
)
return {"$anyElementTrue": lhs_mql}


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldIn(EmbeddedModelArrayFieldBuiltinLookup, lookups.In):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldExact(EmbeddedModelArrayFieldBuiltinLookup, lookups.Exact):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldIExact(EmbeddedModelArrayFieldBuiltinLookup, lookups.IExact):
get_db_prep_lookup_value_is_iterable = False


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldGreaterThan(EmbeddedModelArrayFieldBuiltinLookup, lookups.GreaterThan):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldGreaterThanOrEqual(
EmbeddedModelArrayFieldBuiltinLookup, lookups.GreaterThanOrEqual
):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldLessThan(EmbeddedModelArrayFieldBuiltinLookup, lookups.LessThan):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldLessThanOrEqual(
EmbeddedModelArrayFieldBuiltinLookup, lookups.LessThanOrEqual
):
pass


class KeyTransform(Transform):
def __init__(self, key_name, array_field, *args, **kwargs):
super().__init__(*args, **kwargs)
self.array_field = array_field
self.key_name = key_name
# Lookups iterate over the array of embedded models. A virtual column
# of the queried field's type represents each element.
column_target = array_field.base_field.embedded_model._meta.get_field(key_name).clone()
column_name = f"$item.{key_name}"
column_target.db_column = column_name
column_target.set_attributes_from_name(column_name)
self._lhs = Col(None, column_target)
self._sub_transform = None

def __call__(self, this, *args, **kwargs):
self._lhs = self._sub_transform(self._lhs, *args, **kwargs)
return self

def get_lookup(self, name):
return self.output_field.get_lookup(name)

def get_transform(self, name):
"""
Validate that `name` is either a field of an embedded model or am
allowed lookup on an embedded model's field.
"""
# Once the sub-lhs is a transform, all the filters are applied over it.
# Otherwise get the transform from the nested embedded model field.
if transform := self._lhs.get_transform(name):
if isinstance(transform, KeyTransformFactory):
raise ValueError("Cannot perform multiple levels of array traversal in a query.")
self._sub_transform = transform
return self
output_field = self._lhs.output_field
# The lookup must be allowed AND a valid lookup for the field.
allowed_lookups = self.output_field.ALLOWED_LOOKUPS.intersection(
set(output_field.get_lookups())
)
suggested_lookups = difflib.get_close_matches(name, allowed_lookups)
if suggested_lookups:
suggested_lookups = " or ".join(suggested_lookups)
suggestion = f", perhaps you meant {suggested_lookups}?"
else:
suggestion = ""
raise FieldDoesNotExist(
f"Unsupported lookup '{name}' for "
f"EmbeddedModelArrayField of '{output_field.__class__.__name__}'"
f"{suggestion}"
)

def as_mql(self, compiler, connection):
inner_lhs_mql = self._lhs.as_mql(compiler, connection)
lhs_mql = process_lhs(self, compiler, connection)
return {
"$ifNull": [
{
"$map": {
"input": lhs_mql,
"as": "item",
"in": inner_lhs_mql,
}
},
[],
]
}

@property
def output_field(self):
return _EmbeddedModelArrayOutputField(self._lhs.output_field)


class KeyTransformFactory:
def __init__(self, key_name, base_field):
self.key_name = key_name
self.base_field = base_field

def __call__(self, *args, **kwargs):
return KeyTransform(self.key_name, self.base_field, *args, **kwargs)
10 changes: 5 additions & 5 deletions docs/source/ref/models/fields.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ We will use the following example model::
def __str__(self):
return self.name

.. fieldlookup:: arrayfield.contains
.. fieldlookup:: mongo-arrayfield.contains

``contains``
^^^^^^^^^^^^
Expand Down Expand Up @@ -134,7 +134,7 @@ passed. It uses the ``$setIntersection`` operator. For example:
>>> Post.objects.filter(tags__contained_by=["thoughts", "django", "tutorial"])
<QuerySet [<Post: First post>, <Post: Second post>, <Post: Third post>]>

.. fieldlookup:: arrayfield.overlap
.. fieldlookup:: mongo-arrayfield.overlap

``overlap``
~~~~~~~~~~~
Expand All @@ -154,7 +154,7 @@ uses the ``$setIntersection`` operator. For example:
>>> Post.objects.filter(tags__overlap=["thoughts", "tutorial"])
<QuerySet [<Post: First post>, <Post: Second post>, <Post: Third post>]>

.. fieldlookup:: arrayfield.len
.. fieldlookup:: mongo-arrayfield.len

``len``
^^^^^^^
Expand All @@ -170,7 +170,7 @@ available for :class:`~django.db.models.IntegerField`. For example:
>>> Post.objects.filter(tags__len=1)
<QuerySet [<Post: Second post>]>

.. fieldlookup:: arrayfield.index
.. fieldlookup:: mongo-arrayfield.index

Index transforms
^^^^^^^^^^^^^^^^
Expand All @@ -196,7 +196,7 @@ array. The lookups available after the transform are those from the

These indexes use 0-based indexing.

.. fieldlookup:: arrayfield.slice
.. fieldlookup:: mongo-arrayfield.slice

Slice transforms
^^^^^^^^^^^^^^^^
Expand Down
66 changes: 66 additions & 0 deletions docs/source/topics/embedded-models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,69 @@ Represented in BSON, the post's structure looks like this:
name: 'Hello world!',
tags: [ { name: 'welcome' }, { name: 'test' } ]
}

Querying ``EmbeddedModelArrayField``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

You can query into an embedded model array using the same double underscore
syntax as relational fields. For example, to find posts that have a tag with
name "test"::

>>> Post.objects.filter(tags__name="test")

There are a limited set of lookups you can chain after an embedded field:

* :lookup:`exact`, :lookup:`iexact`
* :lookup:`in`
* :lookup:`gt`, :lookup:`gte`, :lookup:`lt`, :lookup:`lte`

For example, to find posts that have tags with name "test", "TEST", "tEsT",
etc::

>>> Post.objects.filter(tags__name__iexact="test")

.. fieldlookup:: embeddedmodelarrayfield.len

``len`` transform
^^^^^^^^^^^^^^^^^

You can use the ``len`` transform to filter on the length of the array. The
lookups available afterward are those available for
:class:`~django.db.models.IntegerField`. For example, to match posts with one
tag::

>>> Post.objects.filter(tags__len=1)

or at least one tag::

>>> Post.objects.filter(tags__len__gte=1)

Index and slice transforms
^^^^^^^^^^^^^^^^^^^^^^^^^^

Like :class:`~django_mongodb_backend.fields.ArrayField`, you can use
:lookup:`index <mongo-arrayfield.index>` and :lookup:`slice
<mongo-arrayfield.slice>` transforms to filter on particular items in an array.

For example, to find posts where the first tag is named "test"::

>>> Post.objects.filter(tags__0__name="test")

Or to find posts where the one of the first two tags is named "test"::

>>> Post.objects.filter(tags__0_1__name="test")

These indexes use 0-based indexing.

Nested ``EmbeddedModelArrayField``\s
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If your models use nested ``EmbeddedModelArrayField``\s, you can't use double
underscores to query into the the second level.

For example, if the ``Tag`` model had an ``EmbeddedModelArrayField`` called
``colors``:

>>> Post.objects.filter(tags__colors__name="blue")
...
ValueError: Cannot perform multiple levels of array traversal in a query.
Loading