From 0e8bdf65e6991fec0a7de599675ac70316d60876 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Mon, 23 Jun 2025 10:04:45 -0400 Subject: [PATCH 1/5] BSONVector - updated treatment of ignored bits to match spec. --- bson/binary.py | 11 +++++++++++ test/bson_binary_vector/packed_bit.json | 9 +++++++++ test/test_bson.py | 17 ++++++++++++----- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/bson/binary.py b/bson/binary.py index a1f63adf27..19b727cc5a 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -14,6 +14,7 @@ from __future__ import annotations import struct +import warnings from enum import Enum from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload from uuid import UUID @@ -471,6 +472,10 @@ def from_vector( metadata = struct.pack(" BinaryVector: @@ -522,6 +527,12 @@ def as_vector(self) -> BinaryVector: dtype_format = "B" format_string = f"<{n_values}{dtype_format}" unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) + if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0: + warnings.warn( + "Vector has a padding P, but bits in the final byte lower than P are non-zero. In the next major version, they must be zero.", + DeprecationWarning, + stacklevel=2, + ) return BinaryVector(unpacked_uint8s, dtype, padding) else: diff --git a/test/bson_binary_vector/packed_bit.json b/test/bson_binary_vector/packed_bit.json index 7cc272e38b..3015acba66 100644 --- a/test/bson_binary_vector/packed_bit.json +++ b/test/bson_binary_vector/packed_bit.json @@ -29,6 +29,15 @@ "padding": 3, "canonical_bson": "1600000005766563746F7200040000000910037F0800" }, + { + "description": "PACKED_BIT with inconsistent padding", + "valid": false, + "vector": [127, 7], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000910037F0700" + }, { "description": "Empty Vector PACKED_BIT", "valid": true, diff --git a/test/test_bson.py b/test/test_bson.py index 23e0a29c4f..7084e1a090 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -739,7 +739,7 @@ def test_vector(self): """Tests of subtype 9""" # We start with valid cases, across the 3 dtypes implemented. # Work with a simple vector that can be interpreted as int8, float32, or ubyte - list_vector = [127, 7] + list_vector = [127, 8] # As INT8, vector has length 2 binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8) vector = binary_vector.as_vector() @@ -764,18 +764,18 @@ def test_vector(self): uncompressed = "" for val in list_vector: uncompressed += format(val, "08b") - assert uncompressed[:-padding] == "0111111100000" + assert uncompressed[:-padding] == "0111111100001" # It is worthwhile explicitly showing the values encoded to BSON padded_doc = {"padded_vec": padded_vec} assert ( encode(padded_doc) - == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00" + == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00" ) # and dumped to json assert ( json_util.dumps(padded_doc) - == '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}' + == '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}' ) # FLOAT32 is also implemented @@ -791,8 +791,15 @@ def test_vector(self): else: self.fail("Failed to raise an exception.") - # Test form of Binary.from_vector(BinaryVector) + # Test one must pass zeros for all ignored bits + try: + Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7) + except Exception as exc: + self.assertIsInstance(exc, ValueError) + else: + self.fail("Failed to raise an exception.") + # Test form of Binary.from_vector(BinaryVector) assert padded_vec == Binary.from_vector( BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding) ) From 34c0ecf98fe599746d99f622eb4f61f86eb0baa1 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Mon, 23 Jun 2025 16:20:27 -0400 Subject: [PATCH 2/5] Manually removed test case: PACKED_BIT with inconsistent padding. This will sync up with specifications when pull/1812 is merged. --- test/bson_binary_vector/packed_bit.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/bson_binary_vector/packed_bit.json b/test/bson_binary_vector/packed_bit.json index 3015acba66..7cc272e38b 100644 --- a/test/bson_binary_vector/packed_bit.json +++ b/test/bson_binary_vector/packed_bit.json @@ -29,15 +29,6 @@ "padding": 3, "canonical_bson": "1600000005766563746F7200040000000910037F0800" }, - { - "description": "PACKED_BIT with inconsistent padding", - "valid": false, - "vector": [127, 7], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 3, - "canonical_bson": "1600000005766563746F7200040000000910037F0700" - }, { "description": "Empty Vector PACKED_BIT", "valid": true, From 8199493884c3918a4819eca665ae98d53610e500 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Mon, 23 Jun 2025 17:36:21 -0400 Subject: [PATCH 3/5] Updated changelog --- doc/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index ca4784f919..6363cd7bcb 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -51,6 +51,9 @@ PyMongo 4.13 brings a number of changes including: or the `migration guide `_ for more information. - Fixed a bug where :class:`pymongo.write_concern.WriteConcern` repr was not eval-able when using ``w="majority"``. +- Ignored bits in a BSON BinaryVector of PACKED_BIT dtype should be set to zero. + On writes, this is enforced and is a breaking change. + Reads from the database will not fail, however a warning will be triggered. Issues Resolved ............... From 093a6dc18da337398d82ef29a347b501757daf95 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 24 Jun 2025 10:20:37 -0400 Subject: [PATCH 4/5] Fix typing by adding BinaryVector.__len__ --- bson/binary.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bson/binary.py b/bson/binary.py index 19b727cc5a..38c383c277 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -256,6 +256,9 @@ def __eq__(self, other: Any) -> bool: self.dtype == other.dtype and self.padding == other.padding and self.data == other.data ) + def __len__(self) -> int: + return len(self.data) + class Binary(bytes): """Representation of BSON binary data. From d7e5f3be1dd9a17d0f4f37a95e4135cc6eb1409d Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 1 Jul 2025 14:56:43 -0700 Subject: [PATCH 5/5] Addresses comments. --- bson/binary.py | 2 +- test/test_bson.py | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/bson/binary.py b/bson/binary.py index 38c383c277..926263eee7 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -532,7 +532,7 @@ def as_vector(self) -> BinaryVector: unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0: warnings.warn( - "Vector has a padding P, but bits in the final byte lower than P are non-zero. In the next major version, they must be zero.", + "Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.", DeprecationWarning, stacklevel=2, ) diff --git a/test/test_bson.py b/test/test_bson.py index 7084e1a090..4d25d67ebe 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -784,20 +784,12 @@ def test_vector(self): # Now some invalid cases for x in [-1, 257]: - try: + with self.assertRaises(struct.error): Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT) - except Exception as exc: - self.assertIsInstance(exc, struct.error) - else: - self.fail("Failed to raise an exception.") # Test one must pass zeros for all ignored bits - try: + with self.assertRaises(ValueError): Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7) - except Exception as exc: - self.assertIsInstance(exc, ValueError) - else: - self.fail("Failed to raise an exception.") # Test form of Binary.from_vector(BinaryVector) assert padded_vec == Binary.from_vector(