Skip to content

Commit 67729e2

Browse files
committed
feat: basic BoolBuffer / BoolBufferMut
1 parent 0f93aa7 commit 67729e2

File tree

5 files changed

+618
-0
lines changed

5 files changed

+618
-0
lines changed

vortex-buffer/src/bit/arrow.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
//! Conversions between `BitBuffer` and Arrow's `BooleanBuffer`.
2+
3+
use arrow_buffer::BooleanBuffer;
4+
5+
use crate::{Alignment, BitBuffer, ByteBuffer};
6+
7+
impl From<BooleanBuffer> for BitBuffer {
8+
fn from(value: BooleanBuffer) -> Self {
9+
let offset = value.offset();
10+
let len = value.len();
11+
let buffer: arrow_buffer::Buffer = value.into_inner();
12+
let buffer = ByteBuffer::from_arrow_buffer(buffer, Alignment::of::<u8>());
13+
14+
BitBuffer::new_with_offset(buffer, len, offset)
15+
}
16+
}
17+
18+
impl From<BitBuffer> for BooleanBuffer {
19+
fn from(value: BitBuffer) -> Self {
20+
let offset = value.offset();
21+
let len = value.len();
22+
let buffer = value.into_inner();
23+
24+
BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len)
25+
}
26+
}
27+
28+
#[cfg(test)]
29+
mod tests {
30+
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
31+
32+
use crate::BitBuffer;
33+
34+
#[test]
35+
fn test_from_arrow() {
36+
let mut arrow_bools = BooleanBufferBuilder::new(10);
37+
arrow_bools.append_n(5, true);
38+
arrow_bools.append_n(5, false);
39+
let bit_buffer: BitBuffer = arrow_bools.finish().into();
40+
41+
for i in 0..5 {
42+
assert!(bit_buffer.value(i));
43+
}
44+
45+
for i in 5..10 {
46+
assert!(!bit_buffer.value(i));
47+
}
48+
49+
// Convert back to Arrow
50+
let arrow_bools: BooleanBuffer = bit_buffer.into();
51+
52+
for i in 0..5 {
53+
assert!(arrow_bools.value(i));
54+
}
55+
for i in 5..10 {
56+
assert!(!arrow_bools.value(i));
57+
}
58+
}
59+
}

vortex-buffer/src/bit/buf.rs

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
use crate::{buffer, ByteBuffer};
2+
3+
/// An immutable bitset stored as a packed byte buffer.
4+
pub struct BitBuffer {
5+
buffer: ByteBuffer,
6+
len: usize,
7+
offset: usize,
8+
}
9+
10+
impl BitBuffer {
11+
/// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view.
12+
///
13+
/// Panics if the buffer is not large enough to hold `len` bits.
14+
pub fn new(buffer: ByteBuffer, len: usize) -> Self {
15+
assert!(
16+
buffer.len() * 8 >= len,
17+
"provided ByteBuffer not large enough to back BoolBuffer with len {len}"
18+
);
19+
20+
Self {
21+
buffer,
22+
len,
23+
offset: 0,
24+
}
25+
}
26+
27+
/// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at the
28+
/// given `offset` (in bits).
29+
///
30+
/// Panics if the buffer is not large enough to hold `len` bits or if the offset is greater than
31+
pub fn new_with_offset(buffer: ByteBuffer, len: usize, offset: usize) -> Self {
32+
assert!(
33+
offset < len,
34+
"cannot construct new BoolBuffer with offset {offset} len {len}"
35+
);
36+
assert!(
37+
buffer.len() * 8 >= len - offset,
38+
"provided ByteBuffer not large enough to back BoolBuffer with offset {offset} len {len}"
39+
);
40+
41+
Self {
42+
buffer,
43+
len,
44+
offset,
45+
}
46+
}
47+
48+
/// Create a new `BoolBuffer` of length `len` where all bits are set (true).
49+
pub fn new_set(len: usize) -> Self {
50+
let words = len.div_ceil(8);
51+
let buffer = buffer![0xFF; words];
52+
53+
Self {
54+
buffer,
55+
len,
56+
offset: 0,
57+
}
58+
}
59+
60+
/// Create a new `BoolBuffer` of length `len` where all bits are unset (false).
61+
pub fn new_unset(len: usize) -> Self {
62+
let words = len.div_ceil(8);
63+
let buffer = buffer![0u8; words];
64+
65+
Self {
66+
buffer,
67+
len,
68+
offset: 0,
69+
}
70+
}
71+
72+
/// Get the logical length of this `BoolBuffer`.
73+
///
74+
/// This may differ from the physical length of the backing buffer, for example if it was
75+
/// created using the `new_with_offset` constructor, or if it was sliced.
76+
pub fn len(&self) -> usize {
77+
self.len
78+
}
79+
80+
/// Returns `true` if the `BoolBuffer` is empty.
81+
pub fn is_empty(&self) -> bool {
82+
self.len() == 0
83+
}
84+
85+
/// Offset of start of the buffer in bits.
86+
pub fn offset(&self) -> usize {
87+
self.offset
88+
}
89+
90+
/// Retrieve the value at the given index.
91+
///
92+
/// Panics if the index is out of bounds.
93+
pub fn value(&self, index: usize) -> bool {
94+
assert!(index < self.len, "index {index} exceeds len {}", self.len);
95+
96+
let word_index = (self.offset + index) / 8;
97+
let bit_index = (self.offset + index) % 8;
98+
let word = self.buffer[word_index];
99+
let bit = word & (1 << bit_index);
100+
101+
bit != 0
102+
}
103+
104+
/// Create a new zero-copy slice of this BoolBuffer that begins at the `start` index and extends
105+
/// for `len` bits.
106+
///
107+
/// Panics if the slice would extend beyond the end of the buffer.
108+
pub fn slice(&self, start: usize, len: usize) -> Self {
109+
assert!(
110+
start + len <= self.len,
111+
"slice of len {len} starting at {start} exceeds len {}",
112+
self.len
113+
);
114+
115+
Self {
116+
buffer: self.buffer.clone(),
117+
len,
118+
offset: self.offset + start,
119+
}
120+
}
121+
122+
/// Get the number of set bits in the buffer.
123+
pub fn true_count(&self) -> usize {
124+
if self.is_empty() {
125+
return 0;
126+
}
127+
128+
let first_word = self.offset.div_ceil(8);
129+
let last_word = (self.offset + self.len).div_ceil(8);
130+
131+
let mut ones = 0;
132+
for word in first_word..last_word {
133+
let mut mask = 0xFF;
134+
if word == first_word {
135+
// Mask off the top bits
136+
mask &= 0xFF << (self.offset % 8);
137+
}
138+
139+
if word == last_word {
140+
// Mask off the bottom bits
141+
mask &= 0xFF >> (8 - (self.offset + self.len) % 8);
142+
}
143+
144+
ones += (self.buffer[word] & mask).count_ones();
145+
}
146+
147+
ones as usize
148+
}
149+
150+
/// Get the number of unset bits in the buffer.
151+
pub fn false_count(&self) -> usize {
152+
self.len - self.true_count()
153+
}
154+
}
155+
156+
// Conversions
157+
158+
impl BitBuffer {
159+
/// Consumes this `BoolBuffer` and returns the backing `Buffer<u64>` with any offset
160+
/// and length information applied.
161+
pub fn into_inner(self) -> ByteBuffer {
162+
let word_start = self.offset / 8;
163+
let word_end = (self.offset + self.len).div_ceil(8);
164+
165+
self.buffer.slice(word_start..word_end)
166+
}
167+
}
168+
169+
#[cfg(test)]
170+
mod tests {
171+
use crate::bit::BitBuffer;
172+
use crate::{buffer, ByteBuffer};
173+
174+
#[test]
175+
fn test_bool() {
176+
// Create a new Buffer<u64> of length 1024 where the 8th bit is set.
177+
let buffer: ByteBuffer = buffer![1 << 7; 1024];
178+
let bools = BitBuffer::new(buffer, 1024 * 8);
179+
180+
// sanity checks
181+
assert_eq!(bools.len(), 1024 * 8);
182+
assert!(!bools.is_empty());
183+
assert_eq!(bools.true_count(), 1024);
184+
assert_eq!(bools.false_count(), 1024 * 7);
185+
186+
// Check all of the values
187+
for word in 0..1024 {
188+
for bit in 0..8 {
189+
if bit == 7 {
190+
assert!(bools.value(word * 8 + bit));
191+
} else {
192+
assert!(!bools.value(word * 8 + bit));
193+
}
194+
}
195+
}
196+
197+
// Slice the buffer to create a new subset view.
198+
let sliced = bools.slice(64, 8);
199+
200+
// sanity checks
201+
assert_eq!(sliced.len(), 8);
202+
assert!(!sliced.is_empty());
203+
assert_eq!(sliced.true_count(), 1);
204+
assert_eq!(sliced.false_count(), 7);
205+
206+
// Check all of the values like before
207+
for bit in 0..8 {
208+
if bit == 7 {
209+
assert!(sliced.value(bit));
210+
} else {
211+
assert!(!sliced.value(bit));
212+
}
213+
}
214+
}
215+
}

0 commit comments

Comments
 (0)