Skip to content

Commit 5fe3576

Browse files
committed
add C++ bpf_iter support
bpf iterator is introduced in linux kernel 5.8. https://lore.kernel.org/bpf/[email protected]/ In 5.8, iterator support for task, task_file, bpf_map, netlink_sock and ipv6_route. In 5.9, tcp, udp and hash/array/sk_local_storage map iterators are implemented. This patch added necessary interface to support bpf_iter in bcc. A few C++ APIs are added to bcc. Two bpf_iter examples, task and sk_local_storage_map, are added to illustrate how bpf iterator can be implemented. Python interface can be added later if there is a need. Signed-off-by: Yonghong Song <[email protected]>
1 parent d0e1c93 commit 5fe3576

File tree

7 files changed

+379
-0
lines changed

7 files changed

+379
-0
lines changed

docs/reference_guide.md

+24
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ This guide is incomplete. If something feels missing, check the bcc and kernel s
1919
- [9. kfuncs](#9-kfuncs)
2020
- [10. kretfuncs](#10-kretfuncs)
2121
- [11. lsm probes](#11-lsm-probes)
22+
- [12. bpf iterators](#12-bpf-iterators)
2223
- [Data](#data)
2324
- [1. bpf_probe_read_kernel()](#1-bpf_probe_read_kernel)
2425
- [2. bpf_probe_read_kernel_str()](#2-bpf_probe_read_kernel_str)
@@ -423,6 +424,29 @@ LSM probes require at least a 5.7+ kernel with the following configuation option
423424
Examples in situ:
424425
[search /tests](https://github.com/iovisor/bcc/search?q=LSM_PROBE+path%3Atests&type=Code)
425426
427+
### 12. BPF ITERATORS
428+
429+
Syntax: BPF_ITER(target)
430+
431+
This is a macro to define a program signature for a bpf iterator program. The argument *target* specifies what to iterate for the program.
432+
433+
Currently, kernel does not have interface to discover what targets are supported. A good place to find what is supported is in [tools/testing/selftests/bpf/prog_test/bpf_iter.c](https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/prog_tests/bpf_iter.c) and some sample bpf iter programs are in [tools/testing/selftests/bpf/progs](https://github.com/torvalds/linux/tree/master/tools/testing/selftests/bpf/progs) with file name prefix *bpf_iter*.
434+
435+
The following example defines a program for target *task*, which traverses all tasks in the kernel.
436+
```C
437+
BPF_ITER(task)
438+
{
439+
struct seq_file *seq = ctx->meta->seq;
440+
struct task_struct *task = ctx->task;
441+
442+
if (task == (void *)0)
443+
return 0;
444+
445+
... task->pid, task->tgid, task->comm, ...
446+
return 0;
447+
}
448+
449+
BPF iterators are introduced in 5.8 kernel for task, task_file, bpf_map, netlink_sock and ipv6_route . In 5.9, support is added to tcp/udp sockets and bpf map element (hashmap, arraymap and sk_local_storage_map) traversal.
426450
427451
## Data
428452

examples/cpp/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ target_link_libraries(UseExternalMap bcc-static)
3838
add_executable(CGroupTest CGroupTest.cc)
3939
target_link_libraries(CGroupTest bcc-static)
4040

41+
add_executable(TaskIterator TaskIterator.cc)
42+
target_link_libraries(TaskIterator bcc-static)
43+
44+
add_executable(SkLocalStorageIterator SkLocalStorageIterator.cc)
45+
target_link_libraries(SkLocalStorageIterator bcc-static)
46+
4147
if(INSTALL_CPP_EXAMPLES)
4248
install (TARGETS HelloWorld DESTINATION share/bcc/examples/cpp)
4349
install (TARGETS CPUDistribution DESTINATION share/bcc/examples/cpp)
+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Copyright (c) Facebook, Inc.
3+
* Licensed under the Apache License, Version 2.0 (the "License")
4+
*
5+
* Usage:
6+
* ./SkLocalStorageIterator
7+
*
8+
* BPF socket local storage map iterator supported is added in 5.9.
9+
* But since it takes locks during iterating, it may have performance
10+
* implication if in parallel some other bpf program or user space
11+
* is doing map update/delete for sockets in the same bucket. The issue
12+
* is fixed in 5.10 with the following patch which uses rcu lock instead:
13+
* https://lore.kernel.org/bpf/[email protected]
14+
*
15+
* This example shows how to dump local storage data from all sockets
16+
* associated with one socket local storage map.
17+
* An example output likes below:
18+
* family prot val
19+
* 2 17 20
20+
* 2 17 10
21+
*/
22+
23+
#include <unistd.h>
24+
#include <fstream>
25+
#include <iostream>
26+
#include <string>
27+
#include <net/if.h>
28+
29+
#include "bcc_version.h"
30+
#include "BPF.h"
31+
32+
const std::string BPF_PROGRAM = R"(
33+
34+
#include <linux/bpf.h>
35+
#include <linux/seq_file.h>
36+
#include <net/sock.h>
37+
38+
/* the structure is defined in .c file, so explicitly define
39+
* the structure here.
40+
*/
41+
struct bpf_iter__bpf_sk_storage_map {
42+
union {
43+
struct bpf_iter_meta *meta;
44+
};
45+
union {
46+
struct bpf_map *map;
47+
};
48+
union {
49+
struct sock *sk;
50+
};
51+
union {
52+
void *value;
53+
};
54+
};
55+
56+
BPF_SK_STORAGE(sk_data_map, __u64);
57+
58+
struct info_t {
59+
__u32 family;
60+
__u32 protocol;
61+
__u64 val;
62+
};
63+
64+
BPF_ITER(bpf_sk_storage_map) {
65+
struct seq_file *seq = ctx->meta->seq;
66+
struct sock *sk = ctx->sk;
67+
__u64 *val = ctx->value;
68+
struct info_t info = {};
69+
70+
if (sk == (void *)0 || val == (void *)0)
71+
return 0;
72+
73+
info.family = sk->sk_family;
74+
info.protocol = sk->sk_protocol;
75+
info.val = *val;
76+
bpf_seq_write(seq, &info, sizeof(info));
77+
78+
return 0;
79+
}
80+
)";
81+
82+
struct info_t {
83+
unsigned family;
84+
unsigned protocol;
85+
unsigned long long val;
86+
};
87+
88+
int main() {
89+
ebpf::BPF bpf;
90+
auto res = bpf.init(BPF_PROGRAM);
91+
if (res.code() != 0) {
92+
std::cerr << res.msg() << std::endl;
93+
return 1;
94+
}
95+
96+
// create two sockets
97+
int sockfd1 = socket(AF_INET, SOCK_DGRAM, 0);
98+
if (sockfd1 < 0) {
99+
std::cerr << "socket1 create failure: " << sockfd1 << std::endl;
100+
return 1;
101+
}
102+
103+
int sockfd2 = socket(AF_INET, SOCK_DGRAM, 0);
104+
if (sockfd2 < 0) {
105+
std::cerr << "socket2 create failure: " << sockfd2 << std::endl;
106+
close(sockfd1);
107+
return 1;
108+
}
109+
110+
unsigned long long v1 = 10, v2 = 20;
111+
auto sk_table = bpf.get_sk_storage_table<unsigned long long>("sk_data_map");
112+
113+
res = sk_table.update_value(sockfd1, v1);
114+
if (res.code() != 0) {
115+
std::cerr << "sk_data_map sockfd1 update failure: " << res.msg() << std::endl;
116+
close(sockfd2);
117+
close(sockfd1);
118+
return 1;
119+
}
120+
121+
res = sk_table.update_value(sockfd2, v2);
122+
if (res.code() != 0) {
123+
std::cerr << "sk_data_map sockfd2 update failure: " << res.msg() << std::endl;
124+
close(sockfd2);
125+
close(sockfd1);
126+
return 1;
127+
}
128+
129+
int prog_fd;
130+
res = bpf.load_func("bpf_iter__bpf_sk_storage_map", BPF_PROG_TYPE_TRACING, prog_fd);
131+
if (res.code() != 0) {
132+
std::cerr << res.msg() << std::endl;
133+
return 1;
134+
}
135+
136+
union bpf_iter_link_info link_info = {};
137+
link_info.map.map_fd = sk_table.get_fd();
138+
int link_fd = bcc_iter_attach(prog_fd, &link_info, sizeof(union bpf_iter_link_info));
139+
if (link_fd < 0) {
140+
std::cerr << "bcc_iter_attach failed: " << link_fd << std::endl;
141+
close(sockfd2);
142+
close(sockfd1);
143+
return 1;
144+
}
145+
146+
int iter_fd = bcc_iter_create(link_fd);
147+
if (iter_fd < 0) {
148+
std::cerr << "bcc_iter_create failed: " << iter_fd << std::endl;
149+
close(link_fd);
150+
close(sockfd2);
151+
close(sockfd1);
152+
return 1;
153+
}
154+
155+
// Header.
156+
printf("family\tprot\tval\n");
157+
158+
struct info_t info[20];
159+
int len, leftover = 0, info_size = 20 * sizeof(struct info_t);
160+
while ((len = read(iter_fd, (char *)info + leftover, info_size - leftover))) {
161+
if (len < 0) {
162+
if (len == -EAGAIN)
163+
continue;
164+
std::cerr << "read failed: " << len << std::endl;
165+
break;
166+
}
167+
168+
int num_info = len / sizeof(struct info_t);
169+
for (int i = 0; i < num_info; i++) {
170+
printf("%d\t%d\t%lld\n", info[i].family, info[i].protocol, info[i].val);
171+
}
172+
173+
leftover = len % sizeof(struct info_t);
174+
if (num_info > 0)
175+
memcpy(info, (void *)&info[num_info], leftover);
176+
}
177+
178+
close(iter_fd);
179+
close(link_fd);
180+
close(sockfd2);
181+
close(sockfd1);
182+
return 0;
183+
}

examples/cpp/TaskIterator.cc

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Copyright (c) Facebook, Inc.
3+
* Licensed under the Apache License, Version 2.0 (the "License")
4+
*
5+
* Usage:
6+
* ./TaskIterator
7+
*
8+
* BPF task iterator is available since linux 5.8.
9+
* This example shows how to dump all threads in the system with
10+
* bpf iterator. An example output likes below:
11+
* tid comm
12+
* 1 systemd
13+
* 2 kthreadd
14+
* 3 rcu_gp
15+
* 4 rcu_par_gp
16+
* 6 kworker/0:0H
17+
* ...
18+
* 2613386 sleep
19+
* 2613474 GetCountersCPU6
20+
* 2613587 GetCountersCPU7
21+
* 2613621 CPUThreadPool69
22+
* 2613906 GetCountersCPU5
23+
* 2614140 GetCountersCPU2
24+
* 2614193 CfgrExtension56
25+
* 2614449 ruby-timer-thr
26+
* 2614529 chef-client
27+
* 2615122 systemd-hostnam
28+
* ...
29+
* 2608477 sudo
30+
* 2608478 TaskIterator
31+
*/
32+
33+
#include <unistd.h>
34+
#include <fstream>
35+
#include <iostream>
36+
#include <string>
37+
38+
#include "bcc_version.h"
39+
#include "BPF.h"
40+
41+
const std::string BPF_PROGRAM = R"(
42+
#include <linux/bpf.h>
43+
#include <linux/seq_file.h>
44+
#include <linux/sched.h>
45+
46+
/* the structure is defined in .c file, so explicitly define
47+
* the structure here.
48+
*/
49+
struct bpf_iter__task {
50+
union {
51+
struct bpf_iter_meta *meta;
52+
};
53+
union {
54+
struct task_struct *task;
55+
};
56+
};
57+
58+
struct info_t {
59+
int tid;
60+
char comm[TASK_COMM_LEN];
61+
};
62+
63+
BPF_ITER(task) {
64+
struct seq_file *seq = ctx->meta->seq;
65+
struct task_struct *task = ctx->task;
66+
struct info_t info = {};
67+
68+
if (task == (void *)0)
69+
return 0;
70+
71+
info.tid = task->pid;
72+
__builtin_memcpy(&info.comm, task->comm, sizeof(info.comm));
73+
bpf_seq_write(seq, &info, sizeof(info));
74+
75+
return 0;
76+
}
77+
)";
78+
79+
// linux/sched.h
80+
#define TASK_COMM_LEN 16
81+
82+
struct info_t {
83+
int tid;
84+
char comm[TASK_COMM_LEN];
85+
};
86+
87+
int main() {
88+
ebpf::BPF bpf;
89+
auto res = bpf.init(BPF_PROGRAM);
90+
if (res.code() != 0) {
91+
std::cerr << res.msg() << std::endl;
92+
return 1;
93+
}
94+
95+
int prog_fd;
96+
res = bpf.load_func("bpf_iter__task", BPF_PROG_TYPE_TRACING, prog_fd);
97+
if (res.code() != 0) {
98+
std::cerr << res.msg() << std::endl;
99+
return 1;
100+
}
101+
102+
int link_fd = bcc_iter_attach(prog_fd, NULL, 0);
103+
if (link_fd < 0) {
104+
std::cerr << "bcc_iter_attach failed: " << link_fd << std::endl;
105+
return 1;
106+
}
107+
108+
int iter_fd = bcc_iter_create(link_fd);
109+
if (iter_fd < 0) {
110+
std::cerr << "bcc_iter_create failed: " << iter_fd << std::endl;
111+
close(link_fd);
112+
return 1;
113+
}
114+
115+
// Header.
116+
printf("tid\tcomm\n");
117+
118+
struct info_t info[20];
119+
int len, leftover = 0, info_size = 20 * sizeof(struct info_t);
120+
while ((len = read(iter_fd, (char *)info + leftover, info_size - leftover))) {
121+
if (len < 0) {
122+
if (len == -EAGAIN)
123+
continue;
124+
std::cerr << "read failed: " << len << std::endl;
125+
break;
126+
}
127+
128+
int num_info = len / sizeof(struct info_t);
129+
for (int i = 0; i < num_info; i++) {
130+
printf("%d\t%s\n", info[i].tid, info[i].comm);
131+
}
132+
133+
leftover = len % sizeof(struct info_t);
134+
if (num_info > 0)
135+
memcpy(info, (void *)&info[num_info], leftover);
136+
}
137+
138+
close(iter_fd);
139+
close(link_fd);
140+
return 0;
141+
}

src/cc/export/helpers.h

+3
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,9 @@ static int ____##name(unsigned long long *ctx, ##args)
11741174
#define LSM_PROBE(event, args...) \
11751175
BPF_PROG(lsm__ ## event, args)
11761176

1177+
#define BPF_ITER(target) \
1178+
int bpf_iter__ ## target (struct bpf_iter__ ## target *ctx)
1179+
11771180
#define TP_DATA_LOC_READ_CONST(dst, field, length) \
11781181
do { \
11791182
unsigned short __offset = args->data_loc_##field & 0xFFFF; \

0 commit comments

Comments
 (0)