Skip to content

feat(aya): Add task storage map type (in the user-space) #1161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions aya/src/bpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ fn parse_map(
BPF_MAP_TYPE_DEVMAP => Map::DevMap(map),
BPF_MAP_TYPE_DEVMAP_HASH => Map::DevMapHash(map),
BPF_MAP_TYPE_XSKMAP => Map::XskMap(map),
BPF_MAP_TYPE_TASK_STORAGE => Map::TaskStorage(map),
m_type => {
if allow_unsupported_maps {
Map::Unsupported(map)
Expand Down
8 changes: 8 additions & 0 deletions aya/src/maps/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ pub mod ring_buf;
pub mod sock;
pub mod stack;
pub mod stack_trace;
pub mod task_storage;
pub mod xdp;

pub use array::{Array, PerCpuArray, ProgramArray};
Expand All @@ -103,6 +104,7 @@ pub use ring_buf::RingBuf;
pub use sock::{SockHash, SockMap};
pub use stack::Stack;
pub use stack_trace::StackTraceMap;
pub use task_storage::TaskStorage;
pub use xdp::{CpuMap, DevMap, DevMapHash, XskMap};

#[derive(Error, Debug)]
Expand Down Expand Up @@ -312,6 +314,8 @@ pub enum Map {
Stack(MapData),
/// A [`StackTraceMap`] map.
StackTraceMap(MapData),
/// A [`TaskStorage`] map.
TaskStorage(MapData),
/// An unsupported map type.
Unsupported(MapData),
/// A [`XskMap`] map.
Expand Down Expand Up @@ -341,6 +345,7 @@ impl Map {
Self::SockMap(map) => map.obj.map_type(),
Self::Stack(map) => map.obj.map_type(),
Self::StackTraceMap(map) => map.obj.map_type(),
Self::TaskStorage(map) => map.obj.map_type(),
Self::Unsupported(map) => map.obj.map_type(),
Self::XskMap(map) => map.obj.map_type(),
}
Expand Down Expand Up @@ -371,6 +376,7 @@ impl Map {
Self::SockMap(map) => map.pin(path),
Self::Stack(map) => map.pin(path),
Self::StackTraceMap(map) => map.pin(path),
Self::TaskStorage(map) => map.pin(path),
Self::Unsupported(map) => map.pin(path),
Self::XskMap(map) => map.pin(path),
}
Expand Down Expand Up @@ -420,6 +426,7 @@ impl_map_pin!((V) {
BloomFilter,
Queue,
Stack,
TaskStorage,
});

impl_map_pin!((K, V) {
Expand Down Expand Up @@ -501,6 +508,7 @@ impl_try_from_map!((V) {
Queue,
SockHash,
Stack,
TaskStorage,
});

impl_try_from_map!((K, V) {
Expand Down
150 changes: 150 additions & 0 deletions aya/src/maps/task_storage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
//! Task storage.
use std::{
borrow::Borrow,
marker::PhantomData,
os::fd::{AsFd as _, AsRawFd as _},
};

use crate::{
Pod,
maps::{MapData, MapError, check_kv_size},
sys::{PidFd, SyscallError, bpf_map_lookup_elem},
};

/// Task storage is a type of map which uses `task_struct` kernel type as a
/// key. When the task (process) stops, the corresponding entry is
/// automatically removed.
///
/// # Minimum kernel version
///
/// The minimum kernel version required to use this feature is 5.12.
///
/// # Examples
///
/// ```no_run
/// # let mut ebpf = aya::Ebpf::load(&[])?;
/// use aya::maps::TaskStorage;
///
/// let mut task_storage: TaskStorage<_, u32> = TaskStorage::try_from(ebpf.map_mut("TASK_STORAGE").unwrap())?;
///
/// let pid = 0;
/// let value = task_storage.get(&pid, 0)?;
/// # Ok::<(), aya::EbpfError>(())
/// ```
#[doc(alias = "BPF_MAP_TYPE_TASK_STORAGE")]
#[derive(Debug)]
pub struct TaskStorage<T, V> {
pub(crate) inner: T,
_v: PhantomData<V>,
}

impl<T: Borrow<MapData>, V: Pod> TaskStorage<T, V> {
pub(crate) fn new(map: T) -> Result<Self, MapError> {
let data = map.borrow();
check_kv_size::<u32, V>(data)?;
Ok(Self {
inner: map,
_v: PhantomData,
})
}

/// Returns the value stored for the given `pid`.
pub fn get(&self, pid: &u32, flags: u64) -> Result<V, MapError> {
let pidfd = PidFd::open(*pid, 0).map_err(|(_, io_error)| SyscallError {
call: "pidfd_open",
io_error,
})?;
let map_fd = self.inner.borrow().fd().as_fd();
let value = bpf_map_lookup_elem(map_fd, &pidfd.as_raw_fd(), flags).map_err(|io_error| {
SyscallError {
call: "bpf_map_lookup_elem",
io_error,
}
})?;
value.ok_or(MapError::KeyNotFound)
}
}

#[cfg(test)]
mod tests {
use std::io;

use assert_matches::assert_matches;
use aya_obj::generated::bpf_map_type::BPF_MAP_TYPE_TASK_STORAGE;
use libc::EFAULT;

use super::*;
use crate::{
maps::{
Map,
test_utils::{self, new_map},
},
sys::{SysResult, Syscall, override_syscall},
};

fn new_obj_map() -> aya_obj::Map {
test_utils::new_obj_map::<u32>(BPF_MAP_TYPE_TASK_STORAGE)
}

fn sys_error(value: i32) -> SysResult {
Err((-1, io::Error::from_raw_os_error(value)))
}

#[test]
fn test_wrong_value_size() {
let map = new_map(new_obj_map());
let map = Map::TaskStorage(map);
assert_matches!(
TaskStorage::<_, u16>::try_from(&map),
Err(MapError::InvalidValueSize {
size: 2,
expected: 4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coming back to earlier comment - if you're expecting c_int to always be a 4 byte value, you may as well use u32.

})
);
}

#[test]
fn test_try_from_wrong_map() {
let map = new_map(new_obj_map());
let map = Map::Array(map);
assert_matches!(
TaskStorage::<_, u32>::try_from(&map),
Err(MapError::InvalidMapType { .. })
);
}

#[test]
fn test_new_ok() {
let map = new_map(new_obj_map());
assert!(TaskStorage::<_, u32>::new(&map).is_ok());
}

#[test]
fn test_try_from_ok() {
let map = new_map(new_obj_map());
let map = Map::TaskStorage(map);
assert!(TaskStorage::<_, u32>::try_from(&map).is_ok());
}

#[test]
fn test_get_pidfd_syscall_error() {
let mut map = new_map(new_obj_map());
let map = TaskStorage::<_, u32>::new(&mut map).unwrap();

override_syscall(|call| match call {
Syscall::Ebpf { .. } => Ok(1),
Syscall::PidfdOpen { .. } => sys_error(EFAULT),
_ => sys_error(EFAULT),
});

assert_matches!(
map.get(&1, 0), Err(MapError::SyscallError(
SyscallError {
call: "pidfd_open",
io_error
}
))
if io_error.raw_os_error() == Some(EFAULT)
);
}
}
49 changes: 48 additions & 1 deletion aya/src/sys/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,22 @@ mod fake;
use std::{
ffi::{c_int, c_void},
io,
os::fd::{BorrowedFd, OwnedFd},
os::fd::{AsRawFd, BorrowedFd, FromRawFd as _, OwnedFd, RawFd},
};

use aya_obj::generated::{bpf_attr, bpf_cmd, perf_event_attr};
pub(crate) use bpf::*;
#[cfg(test)]
pub(crate) use fake::*;
use libc::pid_t;
#[doc(hidden)]
pub use netlink::netlink_set_link_up;
pub(crate) use netlink::*;
pub(crate) use perf_event::*;
use thiserror::Error;

use crate::MockableFd;

pub(crate) type SysResult = Result<i64, (i64, io::Error)>;

#[cfg_attr(test, expect(dead_code))]
Expand Down Expand Up @@ -50,6 +53,10 @@ pub(crate) enum Syscall<'a> {
fd: BorrowedFd<'a>,
request: PerfEventIoctlRequest<'a>,
},
PidfdOpen {
pid: pid_t,
flags: u32,
},
}

/// A system call error.
Expand Down Expand Up @@ -90,6 +97,11 @@ impl std::fmt::Debug for Syscall<'_> {
.field("fd", fd)
.field("request", request)
.finish(),
Self::PidfdOpen { pid, flags } => f
.debug_struct("Syscall::PidfdOpen")
.field("pid", pid)
.field("flags", flags)
.finish(),
}
}
}
Expand Down Expand Up @@ -137,6 +149,9 @@ fn syscall(call: Syscall<'_>) -> SysResult {
),
}
}
Syscall::PidfdOpen { pid, flags } => {
libc::syscall(libc::SYS_pidfd_open, pid, flags)
}
}
};
// c_long is i32 on armv7.
Expand Down Expand Up @@ -235,3 +250,35 @@ impl From<Stats> for aya_obj::generated::bpf_stats_type {
pub fn enable_stats(stats_type: Stats) -> Result<OwnedFd, SyscallError> {
bpf_enable_stats(stats_type.into()).map(|fd| fd.into_inner())
}

/// A file descriptor of a process.
///
/// A similar type is provided by the Rust standard library as
/// [`std::os::linux::process`] as a nigtly-only experimental API. We are
/// planning to migrate to it once it stabilizes.
pub(crate) struct PidFd(MockableFd);

impl PidFd {
pub(crate) fn open(pid: u32, flags: u32) -> Result<Self, (i64, io::Error)> {
let pid_fd = pidfd_open(pid, flags)? as RawFd;
let pid_fd = unsafe { MockableFd::from_raw_fd(pid_fd) };
Ok(Self(pid_fd))
}
}

impl AsRawFd for PidFd {
fn as_raw_fd(&self) -> RawFd {
self.0.as_raw_fd()
}
}

fn pidfd_open(pid: u32, flags: u32) -> SysResult {
let call = Syscall::PidfdOpen {
pid: pid as pid_t,
flags,
};
#[cfg(not(test))]
return crate::sys::syscall(call);
#[cfg(test)]
return crate::sys::TEST_SYSCALL.with(|test_impl| unsafe { test_impl.borrow()(call) });
}
1 change: 1 addition & 0 deletions test/integration-test/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ object = { workspace = true, features = ["elf", "read_core", "std"] }
rand = { workspace = true, features = ["thread_rng"] }
rbpf = { workspace = true }
scopeguard = { workspace = true }
tempfile = { workspace = true }
test-case = { workspace = true }
test-log = { workspace = true, features = ["log"] }
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
Expand Down
38 changes: 38 additions & 0 deletions test/integration-test/bpf/task_storage.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// clang-format off
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
// clang-format on

char _license[] SEC("license") = "GPL";

struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, __u32);
} task_storage SEC(".maps");

void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;

SEC("tp_btf/sys_enter")
int BPF_PROG(sys_enter, struct pt_regs *regs, long id) {
__u32 value = 1;
struct task_struct *task = bpf_get_current_task_btf();
// This test is triggered by a Rust test, running in a thread. A current task
// (the one returned by `bpf_get_current_task()`) represents that thread. If
// we create a task storage entry for that task, our user-space test will not
// be able to retrieve it as pidfd.
// To make retrieval of the map element by pidfd possible, we need to use the
// `group_leader` (a `struct task_struct*` instance representing the process)
// as the key.
bpf_rcu_read_lock();
struct task_struct *group_leader = BPF_CORE_READ(task, group_leader);
bpf_task_storage_get(&task_storage, group_leader, &value,
BPF_LOCAL_STORAGE_GET_F_CREATE);
bpf_rcu_read_unlock();

return 0;
}
1 change: 1 addition & 0 deletions test/integration-test/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ fn main() -> Result<()> {
("main.bpf.c", false),
("multimap-btf.bpf.c", false),
("reloc.bpf.c", true),
("task_storage.bpf.c", true),
("text_64_64_reloc.c", false),
("variables_reloc.bpf.c", false),
];
Expand Down
2 changes: 2 additions & 0 deletions test/integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ pub const MULTIMAP_BTF: &[u8] =
pub const RELOC_BPF: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/reloc.bpf.o"));
pub const RELOC_BTF: &[u8] =
include_bytes_aligned!(concat!(env!("OUT_DIR"), "/reloc.bpf.target.o"));
pub const TASK_STORAGE: &[u8] =
include_bytes_aligned!(concat!(env!("OUT_DIR"), "/task_storage.bpf.o"));
pub const TEXT_64_64_RELOC: &[u8] =
include_bytes_aligned!(concat!(env!("OUT_DIR"), "/text_64_64_reloc.o"));
pub const VARIABLES_RELOC: &[u8] =
Expand Down
1 change: 1 addition & 0 deletions test/integration-test/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod relocations;
mod ring_buf;
mod smoke;
mod strncmp;
mod task_storage;
mod tcx;
mod uprobe_cookie;
mod xdp;
Loading
Loading