-
Notifications
You must be signed in to change notification settings - Fork 21
INTPYTHON-527 Add Queryable Encryption support #329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
bc52c8e
38fb110
65bd15a
e08945b
7b34b44
8e83ada
4da895c
ed54a9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# Queryable Encryption helpers | ||
# | ||
# TODO: Decide if these helpers should even exist, and if so, find a permanent | ||
# place for them. | ||
|
||
from bson.binary import STANDARD | ||
from bson.codec_options import CodecOptions | ||
from pymongo.encryption import AutoEncryptionOpts, ClientEncryption | ||
|
||
|
||
def get_encrypted_client(auto_encryption_opts, encrypted_connection): | ||
""" | ||
Returns a `ClientEncryption` instance for MongoDB Client-Side Field Level | ||
Encryption (CSFLE) that can be used to create an encrypted collection. | ||
""" | ||
|
||
key_vault_namespace = auto_encryption_opts._key_vault_namespace | ||
kms_providers = auto_encryption_opts._kms_providers | ||
codec_options = CodecOptions(uuid_representation=STANDARD) | ||
return ClientEncryption(kms_providers, key_vault_namespace, encrypted_connection, codec_options) | ||
|
||
|
||
def get_auto_encryption_opts(crypt_shared_lib_path=None, kms_providers=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More to this story:
We should document this. (via @ShaneHarvey, thanks!) |
||
""" | ||
Returns an `AutoEncryptionOpts` instance for MongoDB Client-Side Field | ||
Level Encryption (CSFLE) that can be used to create an encrypted connection. | ||
""" | ||
key_vault_database_name = "encryption" | ||
key_vault_collection_name = "__keyVault" | ||
key_vault_namespace = f"{key_vault_database_name}.{key_vault_collection_name}" | ||
return AutoEncryptionOpts( | ||
key_vault_namespace=key_vault_namespace, | ||
kms_providers=kms_providers, | ||
crypt_shared_lib_path=crypt_shared_lib_path, | ||
) | ||
|
||
|
||
def get_customer_master_key(): | ||
""" | ||
Returns a 96-byte local master key for use with MongoDB Client-Side Field Level | ||
Encryption (CSFLE). For local testing purposes only. In production, use a secure KMS | ||
like AWS, Azure, GCP, or KMIP. | ||
Returns: | ||
bytes: A 96-byte key. | ||
""" | ||
# WARNING: This is a static key for testing only. | ||
# Generate with: os.urandom(96) | ||
return bytes.fromhex( | ||
"000102030405060708090a0b0c0d0e0f" | ||
"101112131415161718191a1b1c1d1e1f" | ||
"202122232425262728292a2b2c2d2e2f" | ||
"303132333435363738393a3b3c3d3e3f" | ||
"404142434445464748494a4b4c4d4e4f" | ||
"505152535455565758595a5b5c5d5e5f" | ||
) | ||
|
||
|
||
def get_kms_providers(): | ||
""" | ||
Return supported KMS providers for MongoDB Client-Side Field Level Encryption (CSFLE). | ||
""" | ||
return { | ||
"local": { | ||
"key": get_customer_master_key(), | ||
}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from django.db import models | ||
|
||
|
||
class EncryptedCharField(models.CharField): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.encrypted = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd think this could be a class-level variable. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,26 @@ def delete(self, *args, **kwargs): | |
|
||
def save(self, *args, **kwargs): | ||
raise NotSupportedError("EmbeddedModels cannot be saved.") | ||
|
||
|
||
class EncryptedModelBase(models.base.ModelBase): | ||
def __new__(cls, name, bases, attrs, **kwargs): | ||
new_class = super().__new__(cls, name, bases, attrs, **kwargs) | ||
|
||
# Build a map of encrypted fields | ||
encrypted_fields = { | ||
"fields": { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add query conditions |
||
field.name: field.__class__.__name__ | ||
for field in new_class._meta.fields | ||
if getattr(field, "encrypted", False) | ||
} | ||
} | ||
|
||
# Store it as a class-level attribute | ||
new_class.encrypted_fields_map = encrypted_fields | ||
return new_class | ||
|
||
|
||
class EncryptedModel(models.Model, metaclass=EncryptedModelBase): | ||
class Meta: | ||
abstract = True |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
import contextlib | ||
|
||
from django.db.backends.base.schema import BaseDatabaseSchemaEditor | ||
from django.db.models import Index, UniqueConstraint | ||
from pymongo.encryption import EncryptedCollectionError | ||
from pymongo.operations import SearchIndexModel | ||
|
||
from django_mongodb_backend.indexes import SearchIndex | ||
|
||
from .encryption import get_encrypted_client | ||
from .fields import EmbeddedModelField | ||
from .indexes import SearchIndex | ||
from .query import wrap_database_errors | ||
from .utils import OperationCollector | ||
|
||
|
@@ -41,7 +44,7 @@ def get_database(self): | |
@wrap_database_errors | ||
@ignore_embedded_models | ||
def create_model(self, model): | ||
self.get_database().create_collection(model._meta.db_table) | ||
self._create_collection(model) | ||
self._create_model_indexes(model) | ||
# Make implicit M2M tables. | ||
for field in model._meta.local_many_to_many: | ||
|
@@ -418,3 +421,45 @@ def _field_should_have_unique(self, field): | |
db_type = field.db_type(self.connection) | ||
# The _id column is automatically unique. | ||
return db_type and field.unique and field.column != "_id" | ||
|
||
def _supports_encryption(self, model): | ||
""" | ||
Check for `supports_encryption` feature and `auto_encryption_opts` | ||
and `embedded_fields_map`. If `supports_encryption` is True and | ||
`auto_encryption_opts` is in the cached connection settings and | ||
the model has an embedded_fields_map property, then encryption | ||
is supported. | ||
""" | ||
return ( | ||
self.connection.features.supports_encryption | ||
and self.connection._settings_dict.get("OPTIONS", {}).get("auto_encryption_opts") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we want encrypted models to silently fallback to working as unencrypted models. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No we don't but I'm not sure why you are making that comment here … as of 65bd15a I'm creating two connections and using the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In |
||
and hasattr(model, "encrypted_fields_map") | ||
) | ||
|
||
def _create_collection(self, model): | ||
""" | ||
Create a collection or, if encryption is supported, create | ||
an encrypted connection then use it to create an encrypted | ||
client then use that to create an encrypted collection. | ||
""" | ||
|
||
if self._supports_encryption(model): | ||
auto_encryption_opts = self.connection._settings_dict.get("OPTIONS", {}).get( | ||
"auto_encryption_opts" | ||
) | ||
# Use the cached settings dict to create a new connection | ||
encrypted_connection = self.connection.get_new_connection( | ||
self.connection._settings_dict | ||
) | ||
# Use the encrypted connection and auto_encryption_opts to create an encrypted client | ||
encrypted_client = get_encrypted_client(auto_encryption_opts, encrypted_connection) | ||
|
||
with contextlib.suppress(EncryptedCollectionError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need a comment about why the error should be suppressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There shouldn't be a case where we're trying to create a collection that already exists. It would be correct to surface that error to the user because their migrations are out of sync with their database. |
||
encrypted_client.create_encrypted_collection( | ||
encrypted_connection[self.connection.database.name], | ||
model._meta.db_table, | ||
model.encrypted_fields_map, | ||
"local", # TODO: KMS provider should be configurable | ||
) | ||
else: | ||
self.get_database().create_collection(model._meta.db_table) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Encrypted models | ||
================ | ||
|
||
``EncryptedCharField`` | ||
---------------------- | ||
|
||
The basics | ||
~~~~~~~~~~ | ||
|
||
Let's consider this example:: | ||
|
||
from django.db import models | ||
|
||
from django_mongodb_backend.fields import EncryptedCharField | ||
from django_mongodb_backend.models import EncryptedModel | ||
|
||
|
||
class Person(EncryptedModel): | ||
ssn = EncryptedCharField("ssn", max_length=11) | ||
|
||
def __str__(self): | ||
return self.ssn |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,4 +10,5 @@ know: | |
|
||
cache | ||
embedded-models | ||
encrypted-models | ||
known-issues |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from django.db import models | ||
|
||
from django_mongodb_backend.fields import EncryptedCharField | ||
from django_mongodb_backend.models import EncryptedModel | ||
|
||
|
||
class Person(EncryptedModel): | ||
ssn = EncryptedCharField("ssn", max_length=11) | ||
name = models.CharField("name", max_length=100) | ||
|
||
def __str__(self): | ||
return self.ssn |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from django.test import TestCase | ||
|
||
from .models import Person | ||
|
||
|
||
class EncryptedModelTests(TestCase): | ||
@classmethod | ||
def setUpTestData(cls): | ||
cls.objs = [Person.objects.create()] | ||
|
||
def test_encrypted_fields_map_on_class(self): | ||
expected = { | ||
"fields": { | ||
"ssn": "EncryptedCharField", | ||
} | ||
} | ||
self.assertEqual(Person.encrypted_fields_map, expected) | ||
|
||
def test_encrypted_fields_map_on_instance(self): | ||
instance = Person(ssn="123-45-6789") | ||
expected = { | ||
"fields": { | ||
"ssn": "EncryptedCharField", | ||
} | ||
} | ||
self.assertEqual(instance.encrypted_fields_map, expected) | ||
|
||
def test_non_encrypted_fields_not_included(self): | ||
aclark4life marked this conversation as resolved.
Show resolved
Hide resolved
|
||
encrypted_field_names = Person.encrypted_fields_map.get("fields").keys() | ||
self.assertNotIn("name", encrypted_field_names) |
Uh oh!
There was an error while loading. Please reload this page.