Skip to content

Commit 6158d30

Browse files
committed
Add support to monitor bpf programs.
BPF is very important component for modern Linux system, and getting more features and adoptions. This commit enables atop to monitor BPF programs. The output looks like: PRC | sys 9h36m | user 11h45m | #proc 759 | #tslpu 0 | #zombie 0 | #exit 5 | CPU | sys 14% | user 18% | irq 0% | idle 7967% | wait 1% | ipc initial | CPL | avg1 0.26 | avg5 0.44 | avg15 0.48 | csw 113066e5 | intr 61268e5 | numcpu 80 | NET | lo ---- | pcki 18145e4 | pcko 18145e4 | sp 0 Mbps | si 2365 Kbps | so 2365 Kbps | BPF_PROG_ID TYPE NAME TOTAL_TIME_NS RUN_CNT AVG_TIME_NS 39 sched_cls fbflow_egress 475443 235 2023.16 175 tracepoint tracepoint__sch 89347 10 8934.70 40 sched_cls fbflow_ingress 53494 227 235.66 PID SYSCPU USRCPU VGROW RGROW RDDSK WRDSK ST EXC THR S CPUNR CPU CMD 1/382 2377 81m34s 2h17m 2.2G 176.1M 319.1M 31.4G N- - 270 S 11 1% configerator_p To build atop with BPF monitoring, we need pass in option to make as: ATOP_BPF_SUPPORT=1 make -j Atop periodically enables monitoring of BPF programs by writing to /proc/sys/kernel/bpf_stats_enabled This part is not 100% multi-process safe. Since monitoring of BPF program has non-trivial overhead to the bpf programs, the following options are added to only monitor BPF program less often: bpfsamplerate, default 1 bpfsampleinterval, default 1 bpf stats is enabled for bpfsampleinterval seconds every bpfsamplerate atop intervals. bpfsampleinterval must be smaller than atop interval.
1 parent 802fb3a commit 6158d30

11 files changed

+739
-148
lines changed

Makefile

+9-3
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,19 @@ OBJMOD3 = showgeneric.o showlinux.o showsys.o showprocs.o
2828
OBJMOD4 = atopsar.o netatopif.o gpucom.o
2929
ALLMODS = $(OBJMOD0) $(OBJMOD1) $(OBJMOD2) $(OBJMOD3) $(OBJMOD4)
3030

31+
ifneq ($(ATOP_BPF_SUPPORT),)
32+
ALLMODS += photobpf.o
33+
ATOP_BPF_LDFLAGS = -lbpf
34+
CFLAGS += -DATOP_BPF_SUPPORT
35+
endif
36+
3137
VERS = $(shell ./atop -V 2>/dev/null| sed -e 's/^[^ ]* //' -e 's/ .*//')
3238

3339
all: atop atopsar atopacctd atopconvert
3440

3541
atop: atop.o $(ALLMODS) Makefile
3642
$(CC) -c version.c
37-
$(CC) atop.o $(ALLMODS) -o atop -lncurses -lz -lm -lrt $(LDFLAGS)
43+
$(CC) atop.o $(ALLMODS) -o atop -lncurses -lz -lm -lrt $(ATOP_BPF_LDFLAGS) $(LDFLAGS)
3844

3945
atopsar: atop
4046
ln -sf atop atopsar
@@ -184,7 +190,7 @@ versdate.h:
184190
./mkdate
185191

186192
atop.o: atop.h photoproc.h photosyst.h acctproc.h showgeneric.h
187-
atopsar.o: atop.h photoproc.h photosyst.h
193+
atopsar.o: atop.h photoproc.h photosyst.h
188194
rawlog.o: atop.h photoproc.h photosyst.h showgeneric.h
189195
various.o: atop.h acctproc.h
190196
ifprop.o: atop.h photosyst.h ifprop.h
@@ -197,7 +203,7 @@ photoproc.o: atop.h photoproc.h
197203
photosyst.o: atop.h photosyst.h
198204
showgeneric.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
199205
showlinux.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
200-
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
206+
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
201207
showprocs.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
202208
version.o: version.c version.h versdate.h
203209
gpucom.o: atop.h photoproc.h photosyst.h

atop.c

+68-26
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/*
22
** ATOP - System & Process Monitor
33
**
4-
** The program 'atop' offers the possibility to view the activity of
4+
** The program 'atop' offers the possibility to view the activity of
55
** the system on system-level as well as process-level.
66
**
77
** This source-file contains the main-function, which verifies the
8-
** calling-parameters and takes care of initialization.
8+
** calling-parameters and takes care of initialization.
99
** The engine-function drives the main sample-loop in which after the
1010
** indicated interval-time a snapshot is taken of the system-level and
1111
** process-level counters and the deviations are calculated and
@@ -35,7 +35,7 @@
3535
** --------------------------------------------------------------------------
3636
**
3737
** After initialization, the main-function calls the ENGINE.
38-
** For every cycle (so after another interval) the ENGINE calls various
38+
** For every cycle (so after another interval) the ENGINE calls various
3939
** functions as shown below:
4040
**
4141
** +---------------------------------------------------------------------+
@@ -48,15 +48,15 @@
4848
** | | ^ | ^ | ^ | ^ | | |
4949
** +---|-----|--------|-----|--------|----|--------|----|--------|----|--+
5050
** | | | | | | | | | |
51-
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
51+
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
5252
** | | | | | | | | | |
5353
** | photosyst | | photoproc | | acct | | deviate | | print |
5454
** | | | | |photoproc | | ...syst | | |
5555
** | | | | | | | ...proc | | |
56-
** +-----------+ +-----------+ +----------+ +----------+ +---------+
56+
** +-----------+ +-----------+ +----------+ +----------+ +---------+
5757
** ^ ^ ^ ^ |
5858
** | | | | |
59-
** | | | V V
59+
** | | | V V
6060
** ______ _________ __________ ________ _________
6161
** / \ / \ / \ / \ / \
6262
** /proc /proc accounting task screen or
@@ -84,8 +84,8 @@
8484
** When all counters have been gathered, functions are called to calculate
8585
** the difference between the current counter-values and the counter-values
8686
** of the previous cycle. These functions operate on the system-level
87-
** as well as on the task-level counters.
88-
** These differences are stored in a new structure(-table).
87+
** as well as on the task-level counters.
88+
** These differences are stored in a new structure(-table).
8989
**
9090
** - deviatsyst()
9191
** Calculates the differences between the current system-level
@@ -98,7 +98,7 @@
9898
** task-database; this "database" is implemented as a linked list
9999
** of taskinfo structures in memory (so no disk-accesses needed).
100100
** Within this linked list hash-buckets are maintained for fast searches.
101-
** The entire task-database is handled via a set of well-defined
101+
** The entire task-database is handled via a set of well-defined
102102
** functions from which the name starts with "pdb_..." (see the
103103
** source-file procdbase.c).
104104
** The processes which have been finished during the last cycle
@@ -112,7 +112,7 @@
112112
** these addresses can be modified in the main-function depending on particular
113113
** flags. In this way various representation-layers (ASCII, graphical, ...)
114114
** can be linked with 'atop'; the one to use can eventually be chosen
115-
** at runtime.
115+
** at runtime.
116116
**
117117
** $Log: atop.c,v $
118118
** Revision 1.49 2010/10/23 14:01:00 gerlof
@@ -298,6 +298,7 @@ static const char rcsid[] = "$Id: atop.c,v 1.49 2010/10/23 14:01:00 gerlof Exp $
298298
#include "showgeneric.h"
299299
#include "parseable.h"
300300
#include "gpucom.h"
301+
#include "photobpf.h"
301302

302303
#define allflags "ab:cde:fghijklmnopqrstuvwxyz1ABCDEFGHIJKL:MNOP:QRSTUVWXYZ"
303304
#define MAXFL 64 /* maximum number of command-line flags */
@@ -323,6 +324,16 @@ char usecolors = 1; /* boolean: colors for high occupation */
323324
char threadview = 0; /* boolean: show individual threads */
324325
char calcpss = 0; /* boolean: read/calculate process PSS */
325326

327+
/*
328+
** arguments for bpf stats sampling
329+
** We enable bpf stats for bpfsampleinterval seconds every bpfsamplerate
330+
** atop intervals. bpfsampleinterval must be smaller than atop interval.
331+
**
332+
** If bpfsamplerate == 0, disable sampling of bpf stats.
333+
*/
334+
unsigned int bpfsamplerate = 1;
335+
unsigned int bpfsampleinterval = 1;
336+
326337
unsigned short hertz;
327338
unsigned int pagesize;
328339
unsigned int nrgpus;
@@ -392,6 +403,9 @@ void do_almostcrit(char *, char *);
392403
void do_atopsarflags(char *, char *);
393404
void do_pacctdir(char *, char *);
394405
void do_perfevents(char *, char *);
406+
void do_bpflines(char *, char *);
407+
void do_bpfsamplerate(char *, char *);
408+
void do_bpfsampleinterval(char *, char *);
395409

396410
static struct {
397411
char *tag;
@@ -441,6 +455,9 @@ static struct {
441455
{ "atopsarflags", do_atopsarflags, 0, },
442456
{ "perfevents", do_perfevents, 0, },
443457
{ "pacctdir", do_pacctdir, 1, },
458+
{ "bpflines", do_bpflines, 0, },
459+
{ "bpfsamplerate", do_bpfsamplerate, 0, },
460+
{ "bpfsampleinterval", do_bpfsampleinterval, 0, },
444461
};
445462

446463
/*
@@ -467,6 +484,8 @@ main(int argc, char *argv[])
467484
exit(42);
468485
}
469486

487+
photo_bpf_check();
488+
470489
/*
471490
** preserve command arguments to allow restart of other version
472491
*/
@@ -498,12 +517,12 @@ main(int argc, char *argv[])
498517
if ( memcmp(p, "atopsar", 7) == 0)
499518
return atopsar(argc, argv);
500519

501-
/*
502-
** interpret command-line arguments & flags
520+
/*
521+
** interpret command-line arguments & flags
503522
*/
504523
if (argc > 1)
505524
{
506-
/*
525+
/*
507526
** gather all flags for visualization-functions
508527
**
509528
** generic flags will be handled here;
@@ -582,17 +601,17 @@ main(int argc, char *argv[])
582601
}
583602

584603
/*
585-
** get optional interval-value and optional number of samples
604+
** get optional interval-value and optional number of samples
586605
*/
587606
if (optind < argc && optind < MAXFL)
588607
{
589608
if (!numeric(argv[optind]))
590609
prusage(argv[0]);
591-
610+
592611
interval = atoi(argv[optind]);
593-
612+
594613
optind++;
595-
614+
596615
if (optind < argc)
597616
{
598617
if (!numeric(argv[optind]) )
@@ -748,6 +767,7 @@ engine(void)
748767
gpupending=0; /* boolean: request sent */
749768

750769
struct gpupidstat *gp = NULL;
770+
struct bstats *bstats = NULL;
751771

752772
/*
753773
** initialization: allocate required memory dynamically
@@ -799,6 +819,8 @@ engine(void)
799819
if (nrgpus)
800820
supportflags |= GPUSTAT;
801821

822+
if (system_support_bpf())
823+
supportflags |= BPFSTAT;
802824
/*
803825
** MAIN-LOOP:
804826
** - Wait for the requested number of seconds or for other trigger
@@ -820,11 +842,15 @@ engine(void)
820842
/*
821843
** if the limit-flag is specified:
822844
** check if the next sample is expected before midnight;
823-
** if not, stop atop now
845+
** if not, stop atop now
824846
*/
825847
if (midnightflag && (curtime+interval) > timelimit)
826848
break;
827849

850+
if ((supportflags & BPFSTAT) &&
851+
bpfsamplerate && sampcnt % bpfsamplerate == 0)
852+
bstats = get_devbstats();
853+
828854
/*
829855
** wait for alarm-signal to arrive (except first sample)
830856
** or wait for SIGUSR1/SIGUSR2
@@ -841,13 +867,13 @@ engine(void)
841867
curtime = time(0); /* seconds since 1-1-1970 */
842868

843869
/*
844-
** send request for statistics to atopgpud
870+
** send request for statistics to atopgpud
845871
*/
846872
if (nrgpus)
847873
gpupending = gpud_statrequest();
848874

849875
/*
850-
** take a snapshot of the current system-level statistics
876+
** take a snapshot of the current system-level statistics
851877
** and calculate the deviations (i.e. calculate the activity
852878
** during the last sample)
853879
*/
@@ -900,7 +926,7 @@ engine(void)
900926
curtime-pretime > 0 ? curtime-pretime : 1);
901927

902928
/*
903-
** take a snapshot of the current task-level statistics
929+
** take a snapshot of the current task-level statistics
904930
** and calculate the deviations (i.e. calculate the activity
905931
** during the last sample)
906932
**
@@ -995,10 +1021,14 @@ engine(void)
9951021
** the deviations
9961022
*/
9971023
lastcmd = (vis.show_samp)( curtime,
998-
curtime-pretime > 0 ? curtime-pretime : 1,
999-
&devtstat, devsstat,
1000-
nprocexit, noverflow, sampcnt==0);
1024+
curtime-pretime > 0 ? curtime-pretime : 1,
1025+
&devtstat, devsstat, bstats,
1026+
nprocexit, noverflow, sampcnt==0);
10011027

1028+
if (bstats) {
1029+
free(bstats->bpfall);
1030+
bstats = NULL;
1031+
}
10021032
/*
10031033
** release dynamically allocated memory
10041034
*/
@@ -1047,7 +1077,7 @@ prusage(char *myname)
10471077
printf("\t -%c show version information\n", MVERSION);
10481078
printf("\t -%c show or log all processes (i.s.o. active processes "
10491079
"only)\n", MALLPROC);
1050-
printf("\t -%c calculate proportional set size (PSS) per process\n",
1080+
printf("\t -%c calculate proportional set size (PSS) per process\n",
10511081
MCALCPSS);
10521082
printf("\t -P generate parseable output for specified label(s)\n");
10531083
printf("\t -L alternate line length (default 80) in case of "
@@ -1126,6 +1156,18 @@ do_linelength(char *name, char *val)
11261156
linelen = get_posval(name, val);
11271157
}
11281158

1159+
void
1160+
do_bpfsamplerate(char *name, char *val)
1161+
{
1162+
bpfsamplerate = get_posval(name, val);
1163+
}
1164+
1165+
void
1166+
do_bpfsampleinterval(char *name, char *val)
1167+
{
1168+
bpfsampleinterval = get_posval(name, val);
1169+
}
1170+
11291171
/*
11301172
** read RC-file and modify defaults accordingly
11311173
*/
@@ -1176,7 +1218,7 @@ readrc(char *path, int syslevel)
11761218
default:
11771219
if (tagname[0] == '#')
11781220
continue;
1179-
1221+
11801222
if (tagvalue[0] != '#')
11811223
break;
11821224

atop.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ struct tstat;
4242
struct devtstat;
4343
struct sstat;
4444
struct netpertask;
45+
struct bstats;
4546

46-
/*
47+
/*
4748
** miscellaneous flags
4849
*/
4950
#define RRBOOT 0x0001
@@ -57,7 +58,7 @@ struct netpertask;
5758

5859
struct visualize {
5960
char (*show_samp) (time_t, int,
60-
struct devtstat *, struct sstat *,
61+
struct devtstat *, struct sstat *, struct bstats *,
6162
int, unsigned int, char);
6263
void (*show_error) (const char *, ...);
6364
void (*show_end) (void);
@@ -104,6 +105,9 @@ extern int netbadness;
104105
extern int pagbadness;
105106
extern int almostcrit;
106107

108+
extern int bpflines;
109+
extern unsigned int bpfsampleinterval;
110+
107111
/*
108112
** bit-values for supportflags
109113
*/
@@ -113,9 +117,10 @@ extern int almostcrit;
113117
#define NETATOPD 0x00000020
114118
#define DOCKSTAT 0x00000040
115119
#define GPUSTAT 0x00000080
120+
#define BPFSTAT 0x00000100
116121

117122
/*
118-
** in rawlog file, the four least significant bits
123+
** in rawlog file, the four least significant bits
119124
** are moved to the per-sample flags and therefor dummy
120125
** in the support flags of the general header
121126
*/
@@ -125,7 +130,7 @@ extern int almostcrit;
125130
** structure containing the start-addresses of functions for visualization
126131
*/
127132
char generic_samp (time_t, int,
128-
struct devtstat *, struct sstat *,
133+
struct devtstat *, struct sstat *, struct bstats *,
129134
int, unsigned int, char);
130135
void generic_error(const char *, ...);
131136
void generic_end (void);
@@ -166,7 +171,7 @@ int contcompar(const void *, const void *);
166171
count_t subcount(count_t, count_t);
167172
int rawread(void);
168173
char rawwrite (time_t, int,
169-
struct devtstat *, struct sstat *,
174+
struct devtstat *, struct sstat *, struct bstats *,
170175
int, unsigned int, char);
171176

172177
int numeric(char *);

0 commit comments

Comments
 (0)