Skip to content

Add querying support to EmbeddedModelArrayfield #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ repos:
rev: v1.1.1
hooks:
- id: doc8
args: ["--ignore=D001"] # ignore line length
# D000 Invalid class attribute value for "class" directive when using
# * (keyword-only parameters separator).
# D001 line length
args: ["--ignore=D000,D001"]
stages: [manual]

- repo: https://github.com/sirosen/check-jsonschema
Expand Down
2 changes: 2 additions & 0 deletions django_mongodb_backend/fields/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from .auto import ObjectIdAutoField
from .duration import register_duration_field
from .embedded_model import EmbeddedModelField
from .embedded_model_array import EmbeddedModelArrayField
from .json import register_json_field
from .objectid import ObjectIdField

__all__ = [
"register_fields",
"ArrayField",
"EmbeddedModelArrayField",
"EmbeddedModelField",
"ObjectIdAutoField",
"ObjectIdField",
Expand Down
2 changes: 1 addition & 1 deletion django_mongodb_backend/fields/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ class ArrayLenTransform(Transform):

def as_mql(self, compiler, connection):
lhs_mql = process_lhs(self, compiler, connection)
return {"$cond": {"if": {"$eq": [lhs_mql, None]}, "then": None, "else": {"$size": lhs_mql}}}
return {"$cond": {"if": {"$isArray": lhs_mql}, "then": {"$size": lhs_mql}, "else": None}}


@ArrayField.register_lookup
Expand Down
5 changes: 3 additions & 2 deletions django_mongodb_backend/fields/embedded_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@ def as_mql(self, compiler, connection):
key_transforms.insert(0, previous.key_name)
previous = previous.lhs
mql = previous.as_mql(compiler, connection)
transforms = ".".join(key_transforms)
return f"{mql}.{transforms}"
for key in key_transforms:
mql = {"$getField": {"input": mql, "field": key}}
return mql

@property
def output_field(self):
Expand Down
279 changes: 279 additions & 0 deletions django_mongodb_backend/fields/embedded_model_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
import difflib

from django.core.exceptions import FieldDoesNotExist
from django.db.models import Field, lookups
from django.db.models.expressions import Col
from django.db.models.lookups import Lookup, Transform

from .. import forms
from ..query_utils import process_lhs, process_rhs
from . import EmbeddedModelField
from .array import ArrayField, ArrayLenTransform


class EmbeddedModelArrayField(ArrayField):
def __init__(self, embedded_model, **kwargs):
if "size" in kwargs:
raise ValueError("EmbeddedModelArrayField does not support size.")
super().__init__(EmbeddedModelField(embedded_model), **kwargs)
self.embedded_model = embedded_model

def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if path == "django_mongodb_backend.fields.embedded_model_array.EmbeddedModelArrayField":
path = "django_mongodb_backend.fields.EmbeddedModelArrayField"
kwargs["embedded_model"] = self.embedded_model
del kwargs["base_field"]
return name, path, args, kwargs

def get_db_prep_value(self, value, connection, prepared=False):
if isinstance(value, list | tuple):
# Must call get_db_prep_save() rather than get_db_prep_value()
# to transform model instances to dicts.
return [self.base_field.get_db_prep_save(i, connection) for i in value]
if value is not None:
raise TypeError(
f"Expected list of {self.embedded_model!r} instances, not {type(value)!r}."
)
return value

def formfield(self, **kwargs):
# Skip ArrayField.formfield() which has some differences, including
# unneeded "base_field", and "max_length" instead of "max_num".
return Field.formfield(
self,
**{
"form_class": forms.EmbeddedModelArrayField,
"model": self.embedded_model,
"max_num": self.max_size,
"prefix": self.name,
**kwargs,
},
)

def get_transform(self, name):
transform = super().get_transform(name)
if transform:
return transform
return KeyTransformFactory(name, self)

def _get_lookup(self, lookup_name):
lookup = super()._get_lookup(lookup_name)
if lookup is None or lookup is ArrayLenTransform:
return lookup

class EmbeddedModelArrayFieldLookups(Lookup):
def as_mql(self, compiler, connection):
raise ValueError(
"Cannot apply this lookup directly to EmbeddedModelArrayField. "
"Try querying one of its embedded fields instead."
)

return EmbeddedModelArrayFieldLookups


class _EmbeddedModelArrayOutputField(ArrayField):
"""
Represents the output of an EmbeddedModelArrayField when traversed in a query path.

This field is not meant to be used directly in model definitions. It exists solely to
support query output resolution; when an EmbeddedModelArrayField is accessed in a query,
the result should behave like an array of the embedded model's target type.

While it mimics ArrayField's lookups behavior, the way those lookups are resolved
follows the semantics of EmbeddedModelArrayField rather than native array behavior.
"""

ALLOWED_LOOKUPS = {
"in",
"exact",
"iexact",
"gt",
"gte",
"lt",
"lte",
"all",
"contained_by",
}

def get_lookup(self, name):
return super().get_lookup(name) if name in self.ALLOWED_LOOKUPS else None


class EmbeddedModelArrayFieldBuiltinLookup(Lookup):
def process_rhs(self, compiler, connection):
value = self.rhs
if not self.get_db_prep_lookup_value_is_iterable:
value = [value]
# Value must be serialized based on the query target.
# If querying a subfield inside the array (i.e., a nested KeyTransform), use the output
# field of the subfield. Otherwise, use the base field of the array itself.
get_db_prep_value = self.lhs._lhs.output_field.get_db_prep_value
return None, [
v if hasattr(v, "as_mql") else get_db_prep_value(v, connection, prepared=True)
for v in value
]

def as_mql(self, compiler, connection):
# Querying a subfield within the array elements (via nested KeyTransform).
# Replicates MongoDB's implicit ANY-match by mapping over the array and applying
# `$in` on the subfield.
lhs_mql = process_lhs(self, compiler, connection)
inner_lhs_mql = lhs_mql["$ifNull"][0]["$map"]["in"]
values = process_rhs(self, compiler, connection)
lhs_mql["$ifNull"][0]["$map"]["in"] = connection.mongo_operators[self.lookup_name](
Copy link
Collaborator Author

@WaVEV WaVEV May 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, I understand why Django returns template and parameters in as_sql results. 😬. I will think in another way to handle this.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Not what you encountered, but primarily it's to prevent SQL injection.)

inner_lhs_mql, values
)
return {"$anyElementTrue": lhs_mql}


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldIn(EmbeddedModelArrayFieldBuiltinLookup, lookups.In):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldExact(EmbeddedModelArrayFieldBuiltinLookup, lookups.Exact):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldIExact(EmbeddedModelArrayFieldBuiltinLookup, lookups.IExact):
get_db_prep_lookup_value_is_iterable = False


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldGreaterThan(EmbeddedModelArrayFieldBuiltinLookup, lookups.GreaterThan):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldGreaterThanOrEqual(
EmbeddedModelArrayFieldBuiltinLookup, lookups.GreaterThanOrEqual
):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldLessThan(EmbeddedModelArrayFieldBuiltinLookup, lookups.LessThan):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldLessThanOrEqual(
EmbeddedModelArrayFieldBuiltinLookup, lookups.LessThanOrEqual
):
pass


@_EmbeddedModelArrayOutputField.register_lookup
class EmbeddedModelArrayFieldAll(EmbeddedModelArrayFieldBuiltinLookup, Lookup):
lookup_name = "all"
get_db_prep_lookup_value_is_iterable = False

def as_mql(self, compiler, connection):
lhs_mql = process_lhs(self, compiler, connection)
values = process_rhs(self, compiler, connection)
return {
"$and": [
{"$ne": [lhs_mql, None]},
{"$ne": [values, None]},
{"$setIsSubset": [values, lhs_mql]},
]
}


@_EmbeddedModelArrayOutputField.register_lookup
class ArrayContainedBy(EmbeddedModelArrayFieldBuiltinLookup, Lookup):
lookup_name = "contained_by"
get_db_prep_lookup_value_is_iterable = False

def as_mql(self, compiler, connection):
lhs_mql = process_lhs(self, compiler, connection)
value = process_rhs(self, compiler, connection)
return {
"$and": [
{"$ne": [lhs_mql, None]},
{"$ne": [value, None]},
{"$setIsSubset": [lhs_mql, value]},
]
}


class KeyTransform(Transform):
def __init__(self, key_name, array_field, *args, **kwargs):
super().__init__(*args, **kwargs)
self.array_field = array_field
self.key_name = key_name
# The iteration items begins from the base_field, a virtual column with
# base field output type is created.
column_target = array_field.embedded_model._meta.get_field(key_name).clone()
column_name = f"$item.{key_name}"
column_target.db_column = column_name
column_target.set_attributes_from_name(column_name)
self._lhs = Col(None, column_target)
self._sub_transform = None

def __call__(self, this, *args, **kwargs):
self._lhs = self._sub_transform(self._lhs, *args, **kwargs)
return self

def get_lookup(self, name):
return self.output_field.get_lookup(name)

def get_transform(self, name):
"""
Validate that `name` is either a field of an embedded model or a
lookup on an embedded model's field.
"""
# Once the sub lhs is a transform, all the filter are applied over it.
# Otherwise get transform from EMF.
if transform := self._lhs.get_transform(name):
if isinstance(transform, KeyTransformFactory):
raise ValueError("Cannot perform multiple levels of array traversal in a query.")
self._sub_transform = transform
return self
output_field = self._lhs.output_field
allowed_lookups = self.output_field.ALLOWED_LOOKUPS.intersection(
set(output_field.get_lookups())
)
suggested_lookups = difflib.get_close_matches(name, allowed_lookups)
if suggested_lookups:
suggested_lookups = " or ".join(suggested_lookups)
suggestion = f", perhaps you meant {suggested_lookups}?"
else:
suggestion = ""
raise FieldDoesNotExist(
f"Unsupported lookup '{name}' for "
f"EmbeddedModelArrayField of '{output_field.__class__.__name__}'"
f"{suggestion}"
)

def as_mql(self, compiler, connection):
inner_lhs_mql = self._lhs.as_mql(compiler, connection)
lhs_mql = process_lhs(self, compiler, connection)
return {
"$ifNull": [
{
"$map": {
"input": lhs_mql,
"as": "item",
"in": inner_lhs_mql,
}
},
[],
]
}

@property
def output_field(self):
return _EmbeddedModelArrayOutputField(self._lhs.output_field)


class KeyTransformFactory:
def __init__(self, key_name, base_field):
self.key_name = key_name
self.base_field = base_field

def __call__(self, *args, **kwargs):
return KeyTransform(self.key_name, self.base_field, *args, **kwargs)
2 changes: 2 additions & 0 deletions django_mongodb_backend/forms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .fields import (
EmbeddedModelArrayField,
EmbeddedModelField,
ObjectIdField,
SimpleArrayField,
Expand All @@ -7,6 +8,7 @@
)

__all__ = [
"EmbeddedModelArrayField",
"EmbeddedModelField",
"SimpleArrayField",
"SplitArrayField",
Expand Down
2 changes: 2 additions & 0 deletions django_mongodb_backend/forms/fields/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from .array import SimpleArrayField, SplitArrayField, SplitArrayWidget
from .embedded_model import EmbeddedModelField
from .embedded_model_array import EmbeddedModelArrayField
from .objectid import ObjectIdField

__all__ = [
"EmbeddedModelArrayField",
"EmbeddedModelField",
"SimpleArrayField",
"SplitArrayField",
Expand Down
Loading