Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
bd5c880
bpf: move bpf_struct_ops_link into bpf.h
rgushchin Oct 27, 2025
50bc105
bpf: initial support for attaching struct ops to cgroups
rgushchin Oct 27, 2025
080fade
bpf: mark struct oom_control's memcg field as TRUSTED_OR_NULL
rgushchin Oct 27, 2025
9d413dc
mm: define mem_cgroup_get_from_ino() outside of CONFIG_SHRINKER_DEBUG
rgushchin Oct 27, 2025
05c05d5
mm: declare memcg_page_state_output() in memcontrol.h
rgushchin Oct 27, 2025
7004b6b
mm: introduce BPF struct ops for OOM handling
rgushchin Oct 27, 2025
4c13dfd
mm: introduce bpf_oom_kill_process() bpf kfunc
rgushchin Oct 27, 2025
11c68d9
mm: introduce BPF kfuncs to deal with memcg pointers
rgushchin Oct 27, 2025
e13e9ed
mm: introduce bpf_get_root_mem_cgroup() BPF kfunc
rgushchin Oct 27, 2025
61a8809
mm: introduce BPF kfuncs to access memcg statistics and events
rgushchin Oct 27, 2025
70dac40
mm: introduce BPF kfunc to access memory events
inwardvessel Oct 27, 2025
50a1763
bpf: selftests: selftests for memcg stat kfuncs
inwardvessel Oct 27, 2025
5887a89
mm: introduce bpf_out_of_memory() BPF kfunc
rgushchin Oct 27, 2025
6b1183c
mm: allow specifying custom oom constraint for BPF triggers
rgushchin Oct 27, 2025
cec44a5
mm: introduce bpf_task_is_oom_victim() kfunc
rgushchin Oct 27, 2025
2b9866e
libbpf: introduce bpf_map__attach_struct_ops_opts()
rgushchin Oct 27, 2025
8d16ddb
bpf: selftests: introduce read_cgroup_file() helper
rgushchin Oct 27, 2025
add67d3
bpf: selftests: BPF OOM handler test
rgushchin Oct 27, 2025
46f6188
sched: psi: refactor psi_trigger_create()
rgushchin Oct 27, 2025
99467d7
sched: psi: implement bpf_psi struct ops
rgushchin Oct 27, 2025
2afd12d
sched: psi: implement bpf_psi_create_trigger() kfunc
rgushchin Oct 27, 2025
c41c577
bpf: selftests: add config for psi
inwardvessel Oct 27, 2025
ea82851
bpf: selftests: PSI struct ops test
rgushchin Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1845,6 +1845,13 @@ struct bpf_raw_tp_link {
u64 cookie;
};

struct bpf_struct_ops_link {
struct bpf_link link;
struct bpf_map __rcu *map;
wait_queue_head_t wait_hup;
u64 cgroup_id;
};

struct bpf_link_primer {
struct bpf_link *link;
struct file *file;
Expand Down
74 changes: 74 additions & 0 deletions include/linux/bpf_oom.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* SPDX-License-Identifier: GPL-2.0+ */

#ifndef __BPF_OOM_H
#define __BPF_OOM_H

struct oom_control;

#define BPF_OOM_NAME_MAX_LEN 64

struct bpf_oom_ctx {
/*
* If bpf_oom_ops is attached to a cgroup, id of this cgroup.
* 0 otherwise.
*/
u64 cgroup_id;
};

struct bpf_oom_ops {
/**
* @handle_out_of_memory: Out of memory bpf handler, called before
* the in-kernel OOM killer.
* @ctx: Execution context
* @oc: OOM control structure
*
* Should return 1 if some memory was freed up, otherwise
* the in-kernel OOM killer is invoked.
*/
int (*handle_out_of_memory)(struct bpf_oom_ctx *ctx, struct oom_control *oc);

/**
* @handle_cgroup_offline: Cgroup offline callback
* @ctx: Execution context
* @cgroup_id: Id of deleted cgroup
*
* Called if the cgroup with the attached bpf_oom_ops is deleted.
*/
void (*handle_cgroup_offline)(struct bpf_oom_ctx *ctx, u64 cgroup_id);

/**
* @name: BPF OOM policy name
*/
char name[BPF_OOM_NAME_MAX_LEN];
};

#ifdef CONFIG_BPF_SYSCALL
/**
* @bpf_handle_oom: handle out of memory condition using bpf
* @oc: OOM control structure
*
* Returns true if some memory was freed.
*/
bool bpf_handle_oom(struct oom_control *oc);


/**
* @bpf_oom_memcg_offline: handle memcg offlining
* @memcg: Memory cgroup is offlined
*
* When a memory cgroup is about to be deleted and there is an
* attached BPF OOM structure, it has to be detached.
*/
void bpf_oom_memcg_offline(struct mem_cgroup *memcg);

#else /* CONFIG_BPF_SYSCALL */
static inline bool bpf_handle_oom(struct oom_control *oc)
{
return false;
}

static inline void bpf_oom_memcg_offline(struct mem_cgroup *memcg) {}

#endif /* CONFIG_BPF_SYSCALL */

#endif /* __BPF_OOM_H */
87 changes: 87 additions & 0 deletions include/linux/bpf_psi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0+ */

#ifndef __BPF_PSI_H
#define __BPF_PSI_H

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/psi_types.h>

struct cgroup;
struct bpf_psi;
struct psi_trigger;
struct psi_trigger_params;

#define BPF_PSI_FULL 0x80000000

struct bpf_psi_ops {
/**
* @init: Initialization callback, suited for creating psi triggers.
* @bpf_psi: bpf_psi pointer, can be passed to bpf_psi_create_trigger().
*
* A non-0 return value means the initialization has been failed.
*/
int (*init)(struct bpf_psi *bpf_psi);

/**
* @handle_psi_event: PSI event callback
* @t: psi_trigger pointer
*/
void (*handle_psi_event)(struct bpf_psi *bpf_psi, struct psi_trigger *t);

/**
* @handle_cgroup_online: Cgroup online callback
* @cgroup_id: Id of the new cgroup
*
* Called every time a new cgroup is created. Can be used
* to create new psi triggers.
*/
void (*handle_cgroup_online)(struct bpf_psi *bpf_psi, u64 cgroup_id);

/**
* @handle_cgroup_offline: Cgroup offline callback
* @cgroup_id: Id of offlined cgroup
*
* Called every time a cgroup with an attached bpf psi trigger is
* offlined.
*/
void (*handle_cgroup_offline)(struct bpf_psi *bpf_psi, u64 cgroup_id);

/* private */
struct bpf_psi *bpf_psi;
};

struct bpf_psi {
spinlock_t lock;
struct list_head triggers;
struct bpf_psi_ops *ops;
struct srcu_struct srcu;
struct list_head node; /* Protected by bpf_psi_lock */
};

#ifdef CONFIG_BPF_SYSCALL
void bpf_psi_add_trigger(struct psi_trigger *t,
const struct psi_trigger_params *params);
void bpf_psi_remove_trigger(struct psi_trigger *t);
void bpf_psi_handle_event(struct psi_trigger *t);

#else /* CONFIG_BPF_SYSCALL */
static inline void bpf_psi_add_trigger(struct psi_trigger *t,
const struct psi_trigger_params *params) {}
static inline void bpf_psi_remove_trigger(struct psi_trigger *t) {}
static inline void bpf_psi_handle_event(struct psi_trigger *t) {}

#endif /* CONFIG_BPF_SYSCALL */

#if (defined(CONFIG_CGROUPS) && defined(CONFIG_PSI) && defined(CONFIG_BPF_SYSCALL))
void bpf_psi_cgroup_online(struct cgroup *cgroup);
void bpf_psi_cgroup_offline(struct cgroup *cgroup);

#else /* CONFIG_CGROUPS && CONFIG_PSI && CONFIG_BPF_SYSCALL */
static inline void bpf_psi_cgroup_online(struct cgroup *cgroup) {}
static inline void bpf_psi_cgroup_offline(struct cgroup *cgroup) {}

#endif /* CONFIG_CGROUPS && CONFIG_PSI && CONFIG_BPF_SYSCALL */

#endif /* __BPF_PSI_H */
4 changes: 4 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,10 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,

static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
{}
static inline struct cgroup *cgroup_get_from_id(u64 id)
{
return NULL;
}
#endif /* !CONFIG_CGROUPS */

#ifdef CONFIG_CGROUPS
Expand Down
12 changes: 10 additions & 2 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
struct bpf_oom_ops;

/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
Expand Down Expand Up @@ -226,6 +227,10 @@ struct mem_cgroup {
*/
bool oom_group;

#ifdef CONFIG_BPF_SYSCALL
struct bpf_oom_ops *bpf_oom;
#endif

int swappiness;

/* memory.events and memory.events.local */
Expand Down Expand Up @@ -832,9 +837,9 @@ static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
{
return memcg ? cgroup_ino(memcg->css.cgroup) : 0;
}
#endif

struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino);
#endif

static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
Expand Down Expand Up @@ -948,7 +953,10 @@ static inline void mod_memcg_page_state(struct page *page,
rcu_read_unlock();
}

unsigned long memcg_events(struct mem_cgroup *memcg, int event);
unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
unsigned long lruvec_page_state_local(struct lruvec *lruvec,
enum node_stat_item idx);
Expand Down Expand Up @@ -1331,12 +1339,12 @@ static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
{
return 0;
}
#endif

static inline struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
{
return NULL;
}
#endif

static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
Expand Down
17 changes: 17 additions & 0 deletions include/linux/oom.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ enum oom_constraint {
CONSTRAINT_CPUSET,
CONSTRAINT_MEMORY_POLICY,
CONSTRAINT_MEMCG,
CONSTRAINT_BPF,
};

enum bpf_oom_flags {
BPF_OOM_FLAGS_WAIT_ON_OOM_LOCK = 1 << 0,
BPF_OOM_FLAGS_LAST = 1 << 1,
};

/*
Expand Down Expand Up @@ -51,6 +57,17 @@ struct oom_control {

/* Used to print the constraint info. */
enum oom_constraint constraint;

#ifdef CONFIG_BPF_SYSCALL
/* Used by the bpf oom implementation to mark the forward progress */
bool bpf_memory_freed;

/* Policy name */
const char *bpf_policy_name;

/* BPF-specific constraint name */
const char *bpf_constraint;
#endif
};

extern struct mutex oom_lock;
Expand Down
21 changes: 18 additions & 3 deletions include/linux/psi.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);

int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
enum psi_res res, struct file *file,
struct kernfs_open_file *of);
int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
const struct psi_trigger_params *param);
void psi_trigger_destroy(struct psi_trigger *t);

__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);

static inline bool psi_file_privileged(struct file *file)
{
/*
* Checking the privilege here on file->f_cred implies that a privileged user
* could open the file and delegate the write to an unprivileged one.
*/
return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
}

#ifdef CONFIG_CGROUPS
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
{
Expand All @@ -41,6 +50,12 @@ int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
void psi_cgroup_restart(struct psi_group *group);

#else
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
{
return &psi_system;
}
#endif

#else /* CONFIG_PSI */
Expand Down
Loading
Loading