Skip to content

Commit d93c955

Browse files
committed
feat: recognize and use over sized allocations
Allocators are allowed to return a larger memory chunk than was asked for. If the amount extra is large enough, then the hash map can use the extra space. The Global allocator will not hit this path, because it won't over-size enough to matter, but custom allocators may. An example of an allocator which allocates full system pages is included in the test suite (Unix only because it uses `mmap`).
1 parent f637220 commit d93c955

File tree

4 files changed

+190
-31
lines changed

4 files changed

+190
-31
lines changed

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ doc-comment = "0.3.1"
4646
bumpalo = { version = "3.13.0", features = ["allocator-api2"] }
4747
rkyv = { version = "0.7.42", features = ["validation"] }
4848

49+
[target.'cfg(unix)'.dev-dependencies]
50+
libc = "0.2"
51+
4952
[features]
5053
default = ["ahash", "inline-more", "allocator-api2"]
5154

src/map.rs

+100
Original file line numberDiff line numberDiff line change
@@ -8958,3 +8958,103 @@ mod test_map {
89588958
assert_eq!(dropped.load(Ordering::SeqCst), 0);
89598959
}
89608960
}
8961+
8962+
#[cfg(all(test, unix))]
8963+
mod test_map_with_mmap_allocations {
8964+
use super::HashMap;
8965+
use allocator_api2::alloc::{AllocError, Allocator};
8966+
use core::alloc::Layout;
8967+
use core::ptr::{null_mut, NonNull};
8968+
8969+
/// This is not a production quality allocator, just good enough for
8970+
/// some basic tests.
8971+
#[derive(Clone, Copy, Debug)]
8972+
struct MmapAllocator {
8973+
/// Guarantee this is a power of 2.
8974+
page_size: usize,
8975+
}
8976+
8977+
impl MmapAllocator {
8978+
fn new() -> Result<Self, AllocError> {
8979+
let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
8980+
if result < 1 {
8981+
return Err(AllocError);
8982+
}
8983+
8984+
let page_size = result as usize;
8985+
if !page_size.is_power_of_two() {
8986+
Err(AllocError)
8987+
} else {
8988+
Ok(Self { page_size })
8989+
}
8990+
}
8991+
8992+
fn fit_to_page_size(&self, n: usize) -> Result<usize, AllocError> {
8993+
// If n=0, give a single page (wasteful, I know).
8994+
let n = if n == 0 { self.page_size } else { n };
8995+
8996+
match n & (self.page_size - 1) {
8997+
0 => Ok(n),
8998+
rem => n.checked_add(self.page_size - rem).ok_or(AllocError),
8999+
}
9000+
}
9001+
}
9002+
9003+
unsafe impl Allocator for MmapAllocator {
9004+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
9005+
if layout.align() > self.page_size {
9006+
return Err(AllocError);
9007+
}
9008+
9009+
let size = self.fit_to_page_size(layout.size())?;
9010+
let null = null_mut();
9011+
let len = size as libc::size_t;
9012+
let prot = libc::PROT_READ | libc::PROT_WRITE;
9013+
let flags = libc::MAP_PRIVATE | libc::MAP_ANON;
9014+
let result = unsafe { libc::mmap(null, len, prot, flags, -1, 0) };
9015+
9016+
if result == libc::MAP_FAILED {
9017+
return Err(AllocError);
9018+
}
9019+
9020+
let addr = NonNull::new(result.cast()).ok_or(AllocError)?;
9021+
Ok(NonNull::slice_from_raw_parts(addr, size))
9022+
}
9023+
9024+
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
9025+
// If they allocated it with this layout, it must round correctly.
9026+
let size = self.fit_to_page_size(layout.size()).unwrap();
9027+
_ = libc::munmap(ptr.as_ptr().cast(), size);
9028+
}
9029+
}
9030+
9031+
#[test]
9032+
fn test_tiny_allocation_gets_rounded_to_page_size() {
9033+
let alloc = MmapAllocator::new().unwrap();
9034+
let mut map: HashMap<usize, (), _, _> = HashMap::with_capacity_in(1, alloc);
9035+
9036+
let rough_bucket_size = core::mem::size_of::<(usize, (), usize)>();
9037+
let x = alloc.page_size / rough_bucket_size;
9038+
// x * ¾ should account for control bytes and also load factor, at
9039+
// least for realistic page sizes (4096+).
9040+
let min_elems = x / 4 * 3;
9041+
let capacity = map.capacity();
9042+
assert!(capacity > min_elems, "failed: {capacity} > {min_elems}");
9043+
9044+
// Fill it up.
9045+
for i in 0..capacity {
9046+
map.insert(i, ());
9047+
}
9048+
// Capacity should not have changed and it should be full.
9049+
assert_eq!(capacity, map.len());
9050+
assert_eq!(capacity, map.capacity());
9051+
9052+
// Alright, make it grow.
9053+
map.insert(capacity, ());
9054+
assert!(
9055+
capacity < map.capacity(),
9056+
"failed: {capacity} < {}",
9057+
map.capacity()
9058+
);
9059+
}
9060+
}

src/raw/alloc.rs

+17-28
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
pub(crate) use self::inner::{do_alloc, Allocator, Global};
1+
pub(crate) use self::inner::{Allocator, Global};
2+
use crate::alloc::alloc::Layout;
3+
use core::ptr::NonNull;
4+
5+
#[allow(clippy::map_err_ignore)]
6+
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<[u8]>, ()> {
7+
match alloc.allocate(layout) {
8+
Ok(ptr) => Ok(ptr),
9+
Err(_) => Err(()),
10+
}
11+
}
212

313
// Nightly-case.
414
// Use unstable `allocator_api` feature.
515
// This is compatible with `allocator-api2` which can be enabled or not.
616
// This is used when building for `std`.
717
#[cfg(feature = "nightly")]
818
mod inner {
9-
use crate::alloc::alloc::Layout;
1019
pub use crate::alloc::alloc::{Allocator, Global};
11-
use core::ptr::NonNull;
12-
13-
#[allow(clippy::map_err_ignore)]
14-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
15-
match alloc.allocate(layout) {
16-
Ok(ptr) => Ok(ptr.as_non_null_ptr()),
17-
Err(_) => Err(()),
18-
}
19-
}
2020
}
2121

2222
// Basic non-nightly case.
@@ -27,17 +27,7 @@ mod inner {
2727
// `core::alloc::Allocator`.
2828
#[cfg(all(not(feature = "nightly"), feature = "allocator-api2"))]
2929
mod inner {
30-
use crate::alloc::alloc::Layout;
3130
pub use allocator_api2::alloc::{Allocator, Global};
32-
use core::ptr::NonNull;
33-
34-
#[allow(clippy::map_err_ignore)]
35-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
36-
match alloc.allocate(layout) {
37-
Ok(ptr) => Ok(ptr.cast()),
38-
Err(_) => Err(()),
39-
}
40-
}
4131
}
4232

4333
// No-defaults case.
@@ -55,7 +45,7 @@ mod inner {
5545

5646
#[allow(clippy::missing_safety_doc)] // not exposed outside of this crate
5747
pub unsafe trait Allocator {
58-
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()>;
48+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()>;
5949
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout);
6050
}
6151

@@ -64,8 +54,11 @@ mod inner {
6454

6555
unsafe impl Allocator for Global {
6656
#[inline]
67-
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()> {
68-
unsafe { NonNull::new(alloc(layout)).ok_or(()) }
57+
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()> {
58+
match unsafe { NonNull::new(alloc(layout)) } {
59+
Some(ptr) => Ok(NonNull::slice_from_raw_parts(ptr, layout.size())),
60+
None => Err(()),
61+
}
6962
}
7063
#[inline]
7164
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
@@ -79,8 +72,4 @@ mod inner {
7972
Global
8073
}
8174
}
82-
83-
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
84-
alloc.allocate(layout)
85-
}
8675
}

src/raw/mod.rs

+70-3
Original file line numberDiff line numberDiff line change
@@ -1736,6 +1736,40 @@ impl RawTableInner {
17361736
}
17371737
}
17381738

1739+
/// Find the previous power of 2. If it's already a power of 2, it's unchanged.
1740+
/// Passing zero is undefined behavior.
1741+
fn prev_pow2(z: usize) -> usize {
1742+
let shift = mem::size_of::<usize>() * 8 - 1;
1743+
return 1 << (shift - (z.leading_zeros() as usize));
1744+
}
1745+
1746+
fn maximum_buckets_in(
1747+
allocation_size: usize,
1748+
table_layout: TableLayout,
1749+
group_width: usize,
1750+
) -> usize {
1751+
// Given an equation like:
1752+
// z >= x * y + x + g
1753+
// x can be maximized by doing:
1754+
// x = (z - g) / (y + 1)
1755+
// If you squint:
1756+
// x is the number of buckets
1757+
// y is the table_layout.size
1758+
// z is the size of the allocation
1759+
// g is the group width
1760+
// But this is ignoring the padding needed for ctrl_align.
1761+
// If we remember these restrictions:
1762+
// x is always a power of 2
1763+
// Layout size for T must always be a multiple of T
1764+
// Then the alignment can be ignored if we add the constraint:
1765+
// x * y >= table_layout.ctrl_align
1766+
// This is taken care of by `capacity_to_buckets`.
1767+
let numerator = allocation_size - group_width;
1768+
let denominator = table_layout.size + 1; // todo: ZSTs?
1769+
let quotient = numerator / denominator;
1770+
prev_pow2(quotient)
1771+
}
1772+
17391773
impl RawTableInner {
17401774
/// Allocates a new [`RawTableInner`] with the given number of buckets.
17411775
/// The control bytes and buckets are left uninitialized.
@@ -1753,7 +1787,7 @@ impl RawTableInner {
17531787
unsafe fn new_uninitialized<A>(
17541788
alloc: &A,
17551789
table_layout: TableLayout,
1756-
buckets: usize,
1790+
mut buckets: usize,
17571791
fallibility: Fallibility,
17581792
) -> Result<Self, TryReserveError>
17591793
where
@@ -1762,13 +1796,29 @@ impl RawTableInner {
17621796
debug_assert!(buckets.is_power_of_two());
17631797

17641798
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
1765-
let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
1799+
let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
17661800
Some(lco) => lco,
17671801
None => return Err(fallibility.capacity_overflow()),
17681802
};
17691803

17701804
let ptr: NonNull<u8> = match do_alloc(alloc, layout) {
1771-
Ok(block) => block.cast(),
1805+
Ok(block) => {
1806+
// Utilize over-sized allocations.
1807+
let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH);
1808+
debug_assert!(x >= buckets);
1809+
// Calculate the new ctrl_offset.
1810+
let (_oversized_layout, oversized_ctrl_offset) =
1811+
match table_layout.calculate_layout_for(x) {
1812+
Some(lco) => lco,
1813+
None => unsafe { hint::unreachable_unchecked() },
1814+
};
1815+
debug_assert!(_oversized_layout.size() <= block.len());
1816+
debug_assert!(oversized_ctrl_offset >= ctrl_offset);
1817+
ctrl_offset = oversized_ctrl_offset;
1818+
buckets = x;
1819+
1820+
block.cast()
1821+
}
17721822
Err(_) => return Err(fallibility.alloc_err(layout)),
17731823
};
17741824

@@ -4586,6 +4636,23 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
45864636
mod test_map {
45874637
use super::*;
45884638

4639+
#[test]
4640+
fn test_prev_pow2() {
4641+
// Skip 0, not defined for that input.
4642+
let mut pow2: usize = 1;
4643+
while (pow2 << 1) > 0 {
4644+
let next_pow2 = pow2 << 1;
4645+
assert_eq!(pow2, prev_pow2(pow2));
4646+
// Need to skip 2, because it's also a power of 2, so it doesn't
4647+
// return the previous power of 2.
4648+
if next_pow2 > 2 {
4649+
assert_eq!(pow2, prev_pow2(pow2 + 1));
4650+
assert_eq!(pow2, prev_pow2(next_pow2 - 1));
4651+
}
4652+
pow2 = next_pow2;
4653+
}
4654+
}
4655+
45894656
#[test]
45904657
fn test_minimum_capacity_for_small_types() {
45914658
#[track_caller]

0 commit comments

Comments
 (0)