Skip to content

bpf, sockmap: Improve performance with CPU affinity #8857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Documentation/bpf/map_sockmap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,20 @@ following cases:

Returns 0

bpf_sk_skb_set_redirect_cpu()
^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: c

int bpf_sk_skb_set_redirect_cpu(struct __sk_buff *s, int redir_cpu)

This kfunc ``bpf_sk_skb_set_redirect_cpu()`` is available to
``BPF_PROG_TYPE_SK_SKB`` BPF programs. It sets the CPU affinity, allowing the
sockmap packet redirecting process to run on the specified CPU as much as
possible, helping users reduce the interference between the sockmap redirecting
background thread and other threads.

Returns 0 on success, or a negative error in case of failure.

bpf_msg_cork_bytes()
^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: c
Expand Down
15 changes: 15 additions & 0 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#define MAX_MSG_FRAGS MAX_SKB_FRAGS
#define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1)

#define BPF_SK_REDIR_CPU_UNSET -1

enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
Expand Down Expand Up @@ -86,6 +88,7 @@ struct sk_psock {
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
s32 redir_cpu;
bool redir_ingress; /* undefined if sk_redir is null */
struct sk_msg *cork;
struct sk_psock_progs progs;
Expand Down Expand Up @@ -393,6 +396,18 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
sk_error_report(sk);
}

static inline void sk_psock_schedule_delayed_work(struct sk_psock *psock,
int delay)
{
s32 redir_cpu = psock->redir_cpu;

if (redir_cpu != BPF_SK_REDIR_CPU_UNSET)
schedule_delayed_work_on(redir_cpu, &psock->work,
delay);
else
schedule_delayed_work(&psock->work, delay);
}

struct sk_psock *sk_psock_init(struct sock *sk, int node);
void sk_psock_stop(struct sk_psock *psock);

Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_NETFILTER,
BTF_KFUNC_HOOK_KPROBE,
BTF_KFUNC_HOOK_SK_MSG,
BTF_KFUNC_HOOK_MAX,
};

Expand Down Expand Up @@ -8649,6 +8650,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
return BTF_KFUNC_HOOK_SK_SKB;
case BPF_PROG_TYPE_SK_MSG:
return BTF_KFUNC_HOOK_SK_MSG;
case BPF_PROG_TYPE_SOCKET_FILTER:
return BTF_KFUNC_HOOK_SOCKET_FILTER;
case BPF_PROG_TYPE_LWT_OUT:
Expand Down
10 changes: 6 additions & 4 deletions net/core/skmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ static void sk_psock_backlog(struct work_struct *work)
* other work that might be here.
*/
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_delayed_work(&psock->work, 1);
sk_psock_schedule_delayed_work(psock, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
Expand Down Expand Up @@ -741,6 +741,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
psock->redir_cpu = BPF_SK_REDIR_CPU_UNSET;

INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
Expand Down Expand Up @@ -939,6 +940,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
sock_drop(from->sk, skb);
return -EIO;
}
psock_other->redir_cpu = from->redir_cpu;
spin_lock_bh(&psock_other->ingress_lock);
if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
spin_unlock_bh(&psock_other->ingress_lock);
Expand All @@ -948,7 +950,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}

skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_delayed_work(&psock_other->work, 0);
sk_psock_schedule_delayed_work(psock_other, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
Expand Down Expand Up @@ -1026,7 +1028,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_delayed_work(&psock->work, 0);
sk_psock_schedule_delayed_work(psock, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
Expand Down Expand Up @@ -1058,7 +1060,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_delayed_work(&psock->work, 0);
sk_psock_schedule_delayed_work(psock, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
Expand Down
39 changes: 39 additions & 0 deletions net/core/sock_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,45 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};

__bpf_kfunc_start_defs();

__bpf_kfunc int bpf_sk_skb_set_redirect_cpu(struct __sk_buff *s, int redir_cpu)
{
struct sk_buff *skb = (struct sk_buff *)s;
struct sock *sk = skb->sk;
struct sk_psock *psock;

WARN_ON_ONCE(!rcu_read_lock_held());

if (!sk || redir_cpu >= num_possible_cpus())
return -EINVAL;

psock = sk_psock(sk);
if (!psock)
return -ENOENT;

psock->redir_cpu = redir_cpu;
return 0;
}

__bpf_kfunc_end_defs();

BTF_KFUNCS_START(bpf_sk_sockmap_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sk_skb_set_redirect_cpu)
BTF_KFUNCS_END(bpf_sk_sockmap_kfunc_ids)

static const struct btf_kfunc_id_set bpf_sk_sockmap_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_sk_sockmap_kfunc_ids,
};

static int init_sockmap_subsystem(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_sk_sockmap_kfunc_set);
}

late_initcall(init_sockmap_subsystem);

struct sock_map_seq_info {
struct bpf_map *map;
struct sock *sk;
Expand Down
35 changes: 32 additions & 3 deletions tools/testing/selftests/bpf/benchs/bench_sockmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ enum SOCKMAP_ARG_FLAG {
ARG_FW_TX_VERDICT_INGRESS,
ARG_FW_TX_VERDICT_EGRESS,
ARG_CTL_RX_STRP,
ARG_CTL_CPU_AFFINITY,
ARG_CTL_NO_VERIFY,
ARG_CONSUMER_DELAY_TIME,
ARG_PRODUCER_DURATION,
};
Expand Down Expand Up @@ -109,6 +111,8 @@ static struct socmap_ctx {
int delay_consumer;
int prod_run_time;
int strp_size;
int cpu_affinity;
int skip_verify;
} ctx = {
.prod_send = 0,
.user_read = 0,
Expand All @@ -118,6 +122,8 @@ static struct socmap_ctx {
.delay_consumer = 0,
.prod_run_time = 0,
.strp_size = 0,
.cpu_affinity = 0,
.skip_verify = 0,
};

static void bench_sockmap_prog_destroy(void)
Expand Down Expand Up @@ -235,11 +241,18 @@ static int create_sockets(void)
static void validate(void)
{
if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
!env.affinity)
!env.affinity) {
fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary\n");
goto err;
}

if (!ctx.cpu_affinity && env.nr_cpus < 4) {
fprintf(stderr, "4 CPU are needed to test cpu-affinity\n");
goto err;
}

return;
err:
fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
exit(1);
}

Expand Down Expand Up @@ -327,6 +340,9 @@ static void setup(void)
exit(1);
}

if (ctx.cpu_affinity)
ctx.skel->data->redir_cpu = 3;

if (create_sockets()) {
fprintf(stderr, "create_net_mode error\n");
goto err;
Expand Down Expand Up @@ -367,9 +383,12 @@ static void measure(struct bench_res *res)

static void verify_data(int *check_pos, char *buf, int rcv)
{
if (ctx.skip_verify)
return;

for (int i = 0 ; i < rcv; i++) {
if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
fprintf(stderr, "verify data fail");
fprintf(stderr, "verify data fail\n");
exit(1);
}
(*check_pos)++;
Expand Down Expand Up @@ -553,6 +572,10 @@ static const struct argp_option opts[] = {
"delay consumer start"},
{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
"producer duration"},
{ "cpu-affinity", ARG_CTL_CPU_AFFINITY, NULL, 0,
"set cpu-affinity for sockmap backlog thread"},
{ "no-verify", ARG_CTL_NO_VERIFY, NULL, 0,
"skip data validation for performance enhancements"},
{},
};

Expand All @@ -571,6 +594,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case ARG_CTL_RX_STRP:
ctx.strp_size = strtol(arg, NULL, 10);
break;
case ARG_CTL_CPU_AFFINITY:
ctx.cpu_affinity = 1;
break;
case ARG_CTL_NO_VERIFY:
ctx.skip_verify = 1;
break;
default:
return ARGP_ERR_UNKNOWN;
}
Expand Down
6 changes: 6 additions & 0 deletions tools/testing/selftests/bpf/bpf_kfuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,10 @@ extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
const struct bpf_dynptr *value_p, int flags) __ksym __weak;
extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;

/* Description
* Set sockmap redir cpu
* Returns
* Error code
*/
extern int bpf_sk_skb_set_redirect_cpu(struct __sk_buff *skb, int redir_cpu) __ksym;
#endif
7 changes: 7 additions & 0 deletions tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <stdbool.h>
#include "bpf_kfuncs.h"

long process_byte = 0;
int verdict_dir = 0;
int dropped = 0;
int pkt_size = 0;
int redir_cpu = -1;

struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
Expand All @@ -33,6 +37,9 @@ int prog_skb_verdict(struct __sk_buff *skb)
int one = 1;
int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);

if (redir_cpu != -1)
bpf_sk_skb_set_redirect_cpu(skb, redir_cpu);

if (ret == SK_DROP)
dropped++;
__sync_fetch_and_add(&process_byte, skb->len);
Expand Down
Loading