Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory used to store inode names #1305

Merged
merged 2 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 26 additions & 44 deletions mountpoint-s3/src/superblock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@ mod expiry;
use expiry::Expiry;

mod inode;
use inode::{valid_inode_name, InodeErrorInfo, InodeKindData, InodeStat, InodeState, WriteStatus};

pub use inode::{Inode, InodeKind, InodeNo, ReadHandle, WriteHandle, WriteMode};
use inode::{InodeErrorInfo, InodeKindData, InodeStat, InodeState, WriteStatus};

mod negative_cache;
use negative_cache::NegativeCache;

pub mod path;
use path::ValidName;

mod readdir;
pub use readdir::ReaddirHandle;

Expand Down Expand Up @@ -333,9 +335,7 @@ impl Superblock {
}

// Should be impossible to fail since [lookup] does this check, but let's be sure
let name = name
.to_str()
.ok_or_else(|| InodeError::InvalidFileName(name.to_owned()))?;
let name: ValidName = name.try_into()?;

// Put inode creation in a block so we don't hold the lock on the parent state longer than needed.
let lookup = {
Expand All @@ -348,7 +348,7 @@ impl Superblock {
let InodeKindData::Directory { children, .. } = &mut parent_state.kind_data else {
return Err(InodeError::NotADirectory(parent_inode.err()));
};
if let Some(inode) = children.get(name) {
if let Some(inode) = children.get(name.as_ref()) {
return Err(InodeError::FileAlreadyExists(inode.err()));
}

Expand Down Expand Up @@ -583,26 +583,18 @@ impl SuperblockInner {
name: &OsStr,
allow_cache: bool,
) -> Result<LookedUp, InodeError> {
let name = name
.to_str()
.ok_or_else(|| InodeError::InvalidFileName(name.to_owned()))?;

// This should be impossible, but just to be safe, explicitly reject lookups to files that
// end with '/', since they could be shadowed by directories.
if name.ends_with('/') {
return Err(InodeError::InvalidFileName(name.into()));
}
let name: ValidName = name.try_into()?;

let lookup = if allow_cache {
self.cache_lookup(parent_ino, name)
self.cache_lookup(parent_ino, &name)
} else {
None
};

let lookup = match lookup {
Some(lookup) => lookup?,
None => {
let remote = self.remote_lookup(client, parent_ino, name).await?;
let remote = self.remote_lookup(client, parent_ino, &name).await?;
self.update_from_remote(parent_ino, name, remote)?
}
};
Expand Down Expand Up @@ -790,7 +782,7 @@ impl SuperblockInner {
pub fn update_from_remote(
&self,
parent_ino: InodeNo,
name: &str,
name: ValidName,
remote: Option<RemoteLookup>,
) -> Result<LookedUp, InodeError> {
let parent = self.get(parent_ino)?;
Expand All @@ -803,14 +795,14 @@ impl SuperblockInner {
if self.config.cache_config.use_negative_cache {
match &remote {
// Remove negative cache entry.
Some(_) => self.negative_cache.remove(parent_ino, name),
Some(_) => self.negative_cache.remove(parent_ino, &name),
// Insert or update TTL of negative cache entry.
None => self.negative_cache.insert(parent_ino, name),
None => self.negative_cache.insert(parent_ino, &name),
}
}

// Fast path: try with only a read lock on the directory first.
if let Some(looked_up) = Self::try_update_fast_path(&parent, name, &remote)? {
if let Some(looked_up) = Self::try_update_fast_path(&parent, &name, &remote)? {
return Ok(looked_up);
}

Expand Down Expand Up @@ -858,16 +850,16 @@ impl SuperblockInner {
fn update_slow_path(
&self,
parent: Inode,
name: &str,
name: ValidName,
remote: Option<RemoteLookup>,
) -> Result<LookedUp, InodeError> {
let mut parent_state = parent.get_mut_inode_state()?;
let inode = match &parent_state.kind_data {
InodeKindData::File { .. } => unreachable!("we know parent is a directory"),
InodeKindData::Directory { children, .. } => children.get(name).cloned(),
InodeKindData::Directory { children, .. } => children.get(name.as_ref()).cloned(),
};
match (remote, inode) {
(None, None) => Err(InodeError::FileDoesNotExist(name.to_owned(), parent.err())),
(None, None) => Err(InodeError::FileDoesNotExist(name.to_string(), parent.err())),
(None, Some(existing_inode)) => {
let InodeKindData::Directory {
children,
Expand Down Expand Up @@ -896,8 +888,8 @@ impl SuperblockInner {
// This existing inode is local-only (because `remote` is None), but is not
// being written. It must have previously existed but been removed on the remote
// side.
children.remove(name);
Err(InodeError::FileDoesNotExist(name.to_owned(), parent.err()))
children.remove(name.as_ref());
Err(InodeError::FileDoesNotExist(name.to_string(), parent.err()))
}
}
(Some(remote), None) => {
Expand Down Expand Up @@ -984,28 +976,18 @@ impl SuperblockInner {
&self,
parent: &Inode,
parent_locked: &mut InodeState,
name: &str,
name: ValidName,
kind: InodeKind,
state: InodeState,
is_new_file: bool,
) -> Result<Inode, InodeError> {
if !valid_inode_name(name) {
warn!(?name, "invalid file name; {} will not be available", kind.as_str());
return Err(InodeError::InvalidFileName(OsString::from(name)));
}

let key = parent
.valid_key()
.new_child(name, kind)
.map_err(|_| InodeError::NotADirectory(parent.err()))?;
let next_ino = self.next_ino.fetch_add(1, Ordering::SeqCst);

let mut key = parent.key().to_owned();
assert!(key.is_empty() || key.ends_with('/'));
key.push_str(name);
if kind == InodeKind::Directory {
key.push('/');
}

trace!(parent=?parent.ino(), ?name, ?kind, new_ino=?next_ino, ?key, "creating new inode");

let inode = Inode::new(next_ino, parent.ino(), name.to_owned(), key, &self.prefix, kind, state);
let inode = Inode::new(next_ino, parent.ino(), key, &self.prefix, state);
trace!(parent=?inode.parent(), name=?inode.name(), kind=?inode.kind(), new_ino=?inode.ino(), key=?inode.key(), "created new inode");

match &mut parent_locked.kind_data {
InodeKindData::File {} => {
Expand All @@ -1017,7 +999,7 @@ impl SuperblockInner {
writing_children,
..
} => {
let existing_inode = children.insert(name.to_owned(), inode.clone());
let existing_inode = children.insert(name.to_string(), inode.clone());
if is_new_file {
writing_children.insert(next_ino);
}
Expand Down
70 changes: 22 additions & 48 deletions mountpoint-s3/src/superblock/inode.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use std::collections::{HashMap, HashSet};
use std::ffi::OsStr;
use std::fmt::{Debug, Display};
use std::os::unix::ffi::OsStrExt as _;
use std::time::{Duration, SystemTime};

use fuser::FileType;
Expand All @@ -14,6 +12,7 @@ use crate::prefix::Prefix;
use crate::sync::atomic::{AtomicBool, Ordering};
use crate::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};

use super::path::ValidKey;
use super::{Expiry, InodeError, SuperblockInner};

pub type InodeNo = u64;
Expand All @@ -33,11 +32,7 @@ struct InodeInner {
// Immutable inode state -- any changes to these requires a new inode
ino: InodeNo,
parent: InodeNo,
name: String,
// TODO deduplicate keys by string interning or something -- many keys will have common prefixes
/// Object key not including the prefix (ends in '/' for directories).
key: String,
kind: InodeKind,
valid_key: ValidKey,
checksum: Crc32c,

/// Mutable inode state. This lock should also be held to serialize operations on an inode (like
Expand All @@ -63,15 +58,19 @@ impl Inode {
}

pub fn name(&self) -> &str {
&self.inner.name
self.inner.valid_key.name()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

}

pub fn kind(&self) -> InodeKind {
self.inner.kind
self.inner.valid_key.kind()
}

pub fn key(&self) -> &str {
&self.inner.key
self.inner.valid_key.as_ref()
}

pub fn valid_key(&self) -> &ValidKey {
&self.inner.valid_key
}

/// Increment lookup count for [Inode] by 1, returning the new value.
Expand Down Expand Up @@ -135,23 +134,13 @@ impl Inode {
}

/// Create a new inode.
pub(super) fn new(
ino: InodeNo,
parent: InodeNo,
name: String,
key: String,
prefix: &Prefix,
kind: InodeKind,
state: InodeState,
) -> Self {
let checksum = Self::compute_checksum(ino, prefix, &key);
pub(super) fn new(ino: InodeNo, parent: InodeNo, key: ValidKey, prefix: &Prefix, state: InodeState) -> Self {
let checksum = Self::compute_checksum(ino, prefix, key.as_ref());
let sync = RwLock::new(state);
let inner = InodeInner {
ino,
parent,
name,
key,
kind,
valid_key: key,
checksum,
sync,
};
Expand All @@ -163,10 +152,8 @@ impl Inode {
Self::new(
ROOT_INODE_NO,
ROOT_INODE_NO,
String::new(),
String::new(),
ValidKey::root(),
prefix,
InodeKind::Directory,
InodeState {
// The root inode never expires because there's no remote to consult for its
// metadata, and it always exists.
Expand Down Expand Up @@ -219,19 +206,6 @@ impl Inode {
}
}

pub fn valid_inode_name<T: AsRef<OsStr>>(name: T) -> bool {
let name = name.as_ref();
// Names cannot be empty
!name.is_empty() &&
// "." and ".." are reserved names (presented by the filesystem layer)
name != "." &&
name != ".." &&
// The delimiter / can never appear in a name
!name.as_bytes().contains(&b'/') &&
// NUL is invalid in POSIX names
!name.as_bytes().contains(&b'\0')
}

/// A wrapper that prints useful customer-facing error messages for inodes by including the object
/// key rather than just the inode number.
pub struct InodeErrorInfo(Inode);
Expand Down Expand Up @@ -598,10 +572,10 @@ mod tests {
let inode = Inode::new(
ino,
ROOT_INODE_NO,
inode_name.to_owned(),
inode_name.to_owned(),
ValidKey::root()
.new_child(inode_name.try_into().unwrap(), InodeKind::File)
.unwrap(),
&superblock.inner.prefix,
InodeKind::File,
InodeState {
write_status: WriteStatus::Remote,
stat: InodeStat::for_file(0, OffsetDateTime::now_utc(), None, None, None, Default::default()),
Expand Down Expand Up @@ -731,9 +705,9 @@ mod tests {
inner: Arc::new(InodeInner {
ino: 42,
parent: parent_ino,
name: file_name.into(),
key: file_name.into(),
kind: InodeKind::File,
valid_key: ValidKey::root()
.new_child(file_name.try_into().unwrap(), InodeKind::File)
.unwrap(),
checksum: bad_checksum,
sync: RwLock::new(InodeState {
stat: InodeStat::for_file(
Expand Down Expand Up @@ -792,9 +766,9 @@ mod tests {
inner: Arc::new(InodeInner {
ino,
parent: ROOT_INODE_NO,
name: inode_name.to_owned(),
key: inode_name.to_owned(),
kind: InodeKind::File,
valid_key: ValidKey::root()
.new_child(inode_name.try_into().unwrap(), InodeKind::File)
.unwrap(),
checksum,
sync: RwLock::new(InodeState {
write_status: WriteStatus::LocalOpen,
Expand Down
Loading
Loading