Skip to content

Commit c421970

Browse files
ethercflowyonghong-song
authored andcommitted
libbpf-tools: covert BCC drsnoop to BPF CO-RE version
Signed-off-by: Wenbo Zhang <[email protected]>
1 parent 593377d commit c421970

11 files changed

+620
-8
lines changed

libbpf-tools/Makefile

+5-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
88
INCLUDES := -I$(OUTPUT)
99
CFLAGS := -g -Wall
1010

11-
APPS = runqslower
11+
APPS = drsnoop runqslower
1212

1313
.PHONY: all
1414
all: $(APPS)
@@ -31,11 +31,13 @@ $(OUTPUT) $(OUTPUT)/libbpf:
3131
$(call msg,MKDIR,$@)
3232
$(Q)mkdir -p $@
3333

34-
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
34+
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) $(OUTPUT)/trace_helpers.o | $(OUTPUT)
3535
$(call msg,BINARY,$@)
3636
$(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
3737

38-
$(OUTPUT)/%.o: %.c $(OUTPUT)/%.skel.h $(wildcard %.h) | $(OUTPUT)
38+
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
39+
40+
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
3941
$(call msg,CC,$@)
4042
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
4143

libbpf-tools/drsnoop.bpf.c

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
// Copyright (c) 2020 Wenbo Zhang
3+
#include "vmlinux.h"
4+
#include <bpf/bpf_helpers.h>
5+
#include "drsnoop.h"
6+
7+
#define BPF_F_INDEX_MASK 0xffffffffULL
8+
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
9+
10+
const volatile pid_t targ_pid = 0;
11+
const volatile pid_t targ_tgid = 0;
12+
const volatile __u64 vm_zone_stat_kaddr = 0;
13+
14+
struct piddata {
15+
u64 ts;
16+
u64 nr_free_pages;
17+
};
18+
19+
struct {
20+
__uint(type, BPF_MAP_TYPE_HASH);
21+
__uint(max_entries, 8192);
22+
__type(key, u32);
23+
__type(value, struct piddata);
24+
} start SEC(".maps");
25+
26+
struct {
27+
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
28+
__uint(key_size, sizeof(u32));
29+
__uint(value_size, sizeof(u32));
30+
} events SEC(".maps");
31+
32+
SEC("tp_btf/mm_vmscan_direct_reclaim_begin")
33+
int handle__mm_vmscan_direct_reclaim_begin(u64 *ctx)
34+
{
35+
u64 *vm_zone_stat_kaddrp = (u64*)vm_zone_stat_kaddr;
36+
u64 id = bpf_get_current_pid_tgid();
37+
struct piddata piddata = {};
38+
u32 tgid = id >> 32;
39+
u32 pid = id;
40+
41+
if (targ_tgid && targ_tgid != tgid)
42+
return 0;
43+
if (targ_pid && targ_pid != pid)
44+
return 0;
45+
46+
piddata.ts = bpf_ktime_get_ns();
47+
if (vm_zone_stat_kaddrp) {
48+
bpf_probe_read(&piddata.nr_free_pages,
49+
sizeof(*vm_zone_stat_kaddrp),
50+
&vm_zone_stat_kaddrp[NR_FREE_PAGES]);
51+
}
52+
53+
bpf_map_update_elem(&start, &pid, &piddata, 0);
54+
return 0;
55+
}
56+
57+
SEC("tp_btf/mm_vmscan_direct_reclaim_end")
58+
int handle__mm_vmscan_direct_reclaim_end(u64 *ctx)
59+
{
60+
u64 id = bpf_get_current_pid_tgid();
61+
/* TP_PROTO(unsigned long nr_reclaimed) */
62+
u64 nr_reclaimed = ctx[0];
63+
struct piddata *piddatap;
64+
struct event event = {};
65+
u32 tgid = id >> 32;
66+
u32 pid = id;
67+
u64 delta_ns;
68+
69+
if (targ_tgid && targ_tgid != tgid)
70+
return 0;
71+
if (targ_pid && targ_pid != pid)
72+
return 0;
73+
74+
/* fetch timestamp and calculate delta */
75+
piddatap = bpf_map_lookup_elem(&start, &pid);
76+
if (!piddatap)
77+
return 0; /* missed entry */
78+
79+
delta_ns = bpf_ktime_get_ns() - piddatap->ts;
80+
81+
event.pid = pid;
82+
event.nr_reclaimed = nr_reclaimed;
83+
event.delta_ns = delta_ns;
84+
event.nr_free_pages = piddatap->nr_free_pages;
85+
bpf_get_current_comm(&event.task, TASK_COMM_LEN);
86+
87+
/* output */
88+
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
89+
&event, sizeof(event));
90+
91+
bpf_map_delete_elem(&start, &pid);
92+
return 0;
93+
}
94+
95+
char LICENSE[] SEC("license") = "GPL";

libbpf-tools/drsnoop.c

+240
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2+
// Copyright (c) 2020 Wenbo Zhang
3+
//
4+
// Based on drsnoop(8) from BCC by Wenbo Zhang.
5+
// 28-Feb-2020 Wenbo Zhang Created this.
6+
#include <argp.h>
7+
#include <stdio.h>
8+
#include <stdlib.h>
9+
#include <string.h>
10+
#include <sys/resource.h>
11+
#include <unistd.h>
12+
#include <time.h>
13+
#include <bpf/libbpf.h>
14+
#include <bpf/bpf.h>
15+
#include "drsnoop.h"
16+
#include "drsnoop.skel.h"
17+
#include "trace_helpers.h"
18+
19+
#define PERF_BUFFER_PAGES 16
20+
#define PERF_POLL_TIMEOUT_MS 100
21+
22+
static struct env {
23+
pid_t pid;
24+
pid_t tid;
25+
time_t duration;
26+
bool extended;
27+
bool verbose;
28+
} env = { };
29+
30+
const char *argp_program_version = "drsnoop 0.1";
31+
const char *argp_program_bug_address = "<[email protected]>";
32+
const char argp_program_doc[] =
33+
"Trace direct reclaim latency.\n"
34+
"\n"
35+
"USAGE: drsnoop [--help] [-p PID] [-t TID] [-d DURATION] [-e]\n"
36+
"\n"
37+
"EXAMPLES:\n"
38+
" drsnoop # trace all direct reclaim events\n"
39+
" drsnoop -p 123 # trace pid 123\n"
40+
" drsnoop -t 123 # trace tid 123 (use for threads only)\n"
41+
" drsnoop -d 10 # trace for 10 seconds only\n"
42+
" drsnoop -e # trace all direct reclaim events with extended faileds\n";
43+
44+
static const struct argp_option opts[] = {
45+
{ "duration", 'd', "DURATION", 0, "Total duration of trace in seconds" },
46+
{ "extended", 'e', NULL, 0, "Extended fields output" },
47+
{ "pid", 'p', "PID", 0, "Process PID to trace" },
48+
{ "tid", 't', "TID", 0, "Thread TID to trace" },
49+
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
50+
{},
51+
};
52+
53+
static int page_size;
54+
55+
static error_t parse_arg(int key, char *arg, struct argp_state *state)
56+
{
57+
time_t duration;
58+
int pid;
59+
60+
switch (key) {
61+
case 'v':
62+
env.verbose = true;
63+
break;
64+
case 'd':
65+
errno = 0;
66+
duration = strtol(arg, NULL, 10);
67+
if (errno || duration <= 0) {
68+
fprintf(stderr, "invalid DURATION: %s\n", arg);
69+
argp_usage(state);
70+
}
71+
env.duration = duration;
72+
break;
73+
case 'e':
74+
env.extended = true;
75+
break;
76+
case 'p':
77+
errno = 0;
78+
pid = strtol(arg, NULL, 10);
79+
if (errno || pid <= 0) {
80+
fprintf(stderr, "invalid PID: %s\n", arg);
81+
argp_usage(state);
82+
}
83+
env.pid = pid;
84+
break;
85+
case 't':
86+
errno = 0;
87+
pid = strtol(arg, NULL, 10);
88+
if (errno || pid <= 0) {
89+
fprintf(stderr, "invalid TID: %s\n", arg);
90+
argp_usage(state);
91+
}
92+
env.tid = pid;
93+
break;
94+
default:
95+
return ARGP_ERR_UNKNOWN;
96+
}
97+
return 0;
98+
}
99+
100+
int libbpf_print_fn(enum libbpf_print_level level,
101+
const char *format, va_list args)
102+
{
103+
if (level == LIBBPF_DEBUG && !env.verbose)
104+
return 0;
105+
return vfprintf(stderr, format, args);
106+
}
107+
108+
static int bump_memlock_rlimit(void)
109+
{
110+
struct rlimit rlim_new = {
111+
.rlim_cur = RLIM_INFINITY,
112+
.rlim_max = RLIM_INFINITY,
113+
};
114+
115+
return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
116+
}
117+
118+
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
119+
{
120+
const struct event *e = data;
121+
struct tm *tm;
122+
char ts[32];
123+
time_t t;
124+
125+
time(&t);
126+
tm = localtime(&t);
127+
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
128+
printf("%-8s %-16s %-6d %8.3f %5lld",
129+
ts, e->task, e->pid, (double)e->delta_ns / 1000000,
130+
e->nr_reclaimed);
131+
if (env.extended)
132+
printf(" %8llu", e->nr_free_pages * page_size / 1024);
133+
printf("\n");
134+
}
135+
136+
void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
137+
{
138+
fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
139+
}
140+
141+
int main(int argc, char **argv)
142+
{
143+
static const struct argp argp = {
144+
.options = opts,
145+
.parser = parse_arg,
146+
.doc = argp_program_doc,
147+
};
148+
struct perf_buffer_opts pb_opts;
149+
struct perf_buffer *pb = NULL;
150+
struct ksyms *ksyms = NULL;
151+
const struct ksym *ksym;
152+
struct drsnoop_bpf *obj;
153+
time_t start_time;
154+
int err;
155+
156+
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
157+
if (err)
158+
return err;
159+
160+
libbpf_set_print(libbpf_print_fn);
161+
162+
err = bump_memlock_rlimit();
163+
if (err) {
164+
fprintf(stderr, "failed to increase rlimit: %d\n", err);
165+
return 1;
166+
}
167+
168+
obj = drsnoop_bpf__open();
169+
if (!obj) {
170+
fprintf(stderr, "failed to open and/or load BPF ojbect\n");
171+
return 1;
172+
}
173+
174+
/* initialize global data (filtering options) */
175+
obj->rodata->targ_tgid = env.pid;
176+
obj->rodata->targ_pid = env.tid;
177+
if (env.extended) {
178+
ksyms = ksyms__load();
179+
if (!ksyms) {
180+
fprintf(stderr, "failed to load kallsyms\n");
181+
goto cleanup;
182+
}
183+
ksym = ksyms__get_symbol(ksyms, "vm_zone_stat");
184+
if (!ksym) {
185+
fprintf(stderr, "failed to get vm_zone_stat's addr\n");
186+
goto cleanup;
187+
}
188+
obj->rodata->vm_zone_stat_kaddr = ksym->addr;
189+
page_size = sysconf(_SC_PAGESIZE);
190+
}
191+
192+
err = drsnoop_bpf__load(obj);
193+
if (err) {
194+
fprintf(stderr, "failed to load BPF object: %d\n", err);
195+
goto cleanup;
196+
}
197+
198+
err = drsnoop_bpf__attach(obj);
199+
if (err) {
200+
fprintf(stderr, "failed to attach BPF programs\n");
201+
goto cleanup;
202+
}
203+
204+
printf("Tracing direct reclaim events");
205+
if (env.duration)
206+
printf(" for %ld secs.\n", env.duration);
207+
else
208+
printf("... Hit Ctrl-C to end.\n");
209+
printf("%-8s %-16s %-6s %8s %5s",
210+
"TIME", "COMM", "TID", "LAT(ms)", "PAGES");
211+
if (env.extended)
212+
printf(" %8s", "FREE(KB)");
213+
printf("\n");
214+
215+
pb_opts.sample_cb = handle_event;
216+
pb_opts.lost_cb = handle_lost_events;
217+
pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES,
218+
&pb_opts);
219+
err = libbpf_get_error(pb);
220+
if (err) {
221+
pb = NULL;
222+
fprintf(stderr, "failed to open perf buffer: %d\n", err);
223+
goto cleanup;
224+
}
225+
226+
start_time = time(NULL);
227+
while (!env.duration || time(NULL) - start_time < env.duration) {
228+
if ((err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS)) < 0) {
229+
printf("error polling perf buffer: %d\n", err);
230+
break;
231+
}
232+
}
233+
234+
cleanup:
235+
perf_buffer__free(pb);
236+
drsnoop_bpf__destroy(obj);
237+
ksyms__free(ksyms);
238+
239+
return err != 0;
240+
}

libbpf-tools/drsnoop.h

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2+
#ifndef __DRSNOOP_H
3+
#define __DRSNOOP_H
4+
5+
#define TASK_COMM_LEN 16
6+
7+
struct event {
8+
char task[TASK_COMM_LEN];
9+
__u64 delta_ns;
10+
__u64 nr_reclaimed;
11+
__u64 nr_free_pages;
12+
pid_t pid;
13+
};
14+
15+
#endif /* __DRSNOOP_H */

0 commit comments

Comments
 (0)