Skip to content

Commit 5152eb7

Browse files
committed
Add support to monitor bpf programs.
BPF is very important component for modern Linux systems, and getting more features and adoptions. This commit enables atop to monitor BPF programs. The output looks like: ATOP - kerneltest002 2020/06/16 17:01:12 -------------- 10s elapsed PRC | sys 2.72s | user 4.85s | #proc 761 | #zombie 0 | #exit 250 | CPU | sys 29% | user 50% | irq 0% | idle 7915% | wait 8% | CPL | avg1 1.68 | avg5 1.05 | avg15 0.72 | csw 160979 | intr 66341 | [...] BPF_PROG_ID NAME TOTAL_TIME_NS RUN_CNT CPU AVG_TIME_NS 894 tracepoint__sch 83882 11 0% 7625.64 893 tracepoint__sch 43231 5 0% 8646.20 892 tracepoint__tas 34818 4 0% 8704.50 PID SYSCPU USRCPU VGROW RGROW RDDSK WRDSK EXC THR S CPUNR CPU CMD 1/113 2669644 0.45s 1.08s 603.1M 23100K 0K 0K - 10 S 59 15% squashfuse_ll To build atop with BPF monitoring, we need pass in option to make as: ATOP_BPF_SUPPORT=1 make -j Atop periodically enables monitoring of BPF programs calling: bpf_enable_stats(BPF_STATS_RUN_TIME); Since monitoring of BPF program has non-trivial overhead to the bpf programs, the following options are added to only monitor BPF program less often: bpfsamplerate, default 1 bpfsampleinterval, default 1 bpf stats is enabled for bpfsampleinterval seconds every bpfsamplerate atop intervals. bpfsampleinterval must be smaller than atop interval. Changes v1 => v2: 1. Instead of using unsafe sysctl, using a safe new API to enable BPF runtime stats. 2. Change output columns: remove "TYPE", add "CPU" for cpu %.
1 parent ef62300 commit 5152eb7

12 files changed

+659
-155
lines changed

Makefile

+9-3
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,18 @@ OBJMOD3 = showgeneric.o showlinux.o showsys.o showprocs.o
2828
OBJMOD4 = atopsar.o netatopif.o gpucom.o
2929
ALLMODS = $(OBJMOD0) $(OBJMOD1) $(OBJMOD2) $(OBJMOD3) $(OBJMOD4)
3030

31+
ifneq ($(ATOP_BPF_SUPPORT),)
32+
ALLMODS += photobpf.o
33+
ATOP_BPF_LDFLAGS = -lbpf
34+
CFLAGS += -DATOP_BPF_SUPPORT
35+
endif
36+
3137
VERS = $(shell ./atop -V 2>/dev/null| sed -e 's/^[^ ]* //' -e 's/ .*//')
3238

3339
all: atop atopsar atopacctd atopconvert atopcat
3440

3541
atop: atop.o $(ALLMODS) Makefile
36-
$(CC) atop.o $(ALLMODS) -o atop -lncursesw -lz -lm -lrt $(LDFLAGS)
42+
$(CC) atop.o $(ALLMODS) -o atop -lncursesw -lz -lm -lrt $(ATOP_BPF_LDFLAGS) $(LDFLAGS)
3743

3844
atopsar: atop
3945
ln -sf atop atopsar
@@ -187,7 +193,7 @@ versdate.h:
187193
./mkdate
188194

189195
atop.o: atop.h photoproc.h photosyst.h acctproc.h showgeneric.h
190-
atopsar.o: atop.h photoproc.h photosyst.h
196+
atopsar.o: atop.h photoproc.h photosyst.h
191197
rawlog.o: atop.h photoproc.h photosyst.h rawlog.h showgeneric.h
192198
various.o: atop.h acctproc.h
193199
ifprop.o: atop.h photosyst.h ifprop.h
@@ -200,7 +206,7 @@ photoproc.o: atop.h photoproc.h
200206
photosyst.o: atop.h photosyst.h
201207
showgeneric.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
202208
showlinux.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
203-
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
209+
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
204210
showprocs.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
205211
version.o: version.c version.h versdate.h
206212
gpucom.o: atop.h photoproc.h photosyst.h

atop.c

+68-26
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/*
22
** ATOP - System & Process Monitor
33
**
4-
** The program 'atop' offers the possibility to view the activity of
4+
** The program 'atop' offers the possibility to view the activity of
55
** the system on system-level as well as process-level.
66
**
77
** This source-file contains the main-function, which verifies the
8-
** calling-parameters and takes care of initialization.
8+
** calling-parameters and takes care of initialization.
99
** The engine-function drives the main sample-loop in which after the
1010
** indicated interval-time a snapshot is taken of the system-level and
1111
** process-level counters and the deviations are calculated and
@@ -35,7 +35,7 @@
3535
** --------------------------------------------------------------------------
3636
**
3737
** After initialization, the main-function calls the ENGINE.
38-
** For every cycle (so after another interval) the ENGINE calls various
38+
** For every cycle (so after another interval) the ENGINE calls various
3939
** functions as shown below:
4040
**
4141
** +---------------------------------------------------------------------+
@@ -48,15 +48,15 @@
4848
** | | ^ | ^ | ^ | ^ | | |
4949
** +---|-----|--------|-----|--------|----|--------|----|--------|----|--+
5050
** | | | | | | | | | |
51-
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
51+
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
5252
** | | | | | | | | | |
5353
** | photosyst | | photoproc | | acct | | deviate | | print |
5454
** | | | | |photoproc | | ...syst | | |
5555
** | | | | | | | ...proc | | |
56-
** +-----------+ +-----------+ +----------+ +----------+ +---------+
56+
** +-----------+ +-----------+ +----------+ +----------+ +---------+
5757
** ^ ^ ^ ^ |
5858
** | | | | |
59-
** | | | V V
59+
** | | | V V
6060
** ______ _________ __________ ________ _________
6161
** / \ / \ / \ / \ / \
6262
** /proc /proc accounting task screen or
@@ -84,8 +84,8 @@
8484
** When all counters have been gathered, functions are called to calculate
8585
** the difference between the current counter-values and the counter-values
8686
** of the previous cycle. These functions operate on the system-level
87-
** as well as on the task-level counters.
88-
** These differences are stored in a new structure(-table).
87+
** as well as on the task-level counters.
88+
** These differences are stored in a new structure(-table).
8989
**
9090
** - deviatsyst()
9191
** Calculates the differences between the current system-level
@@ -98,7 +98,7 @@
9898
** task-database; this "database" is implemented as a linked list
9999
** of taskinfo structures in memory (so no disk-accesses needed).
100100
** Within this linked list hash-buckets are maintained for fast searches.
101-
** The entire task-database is handled via a set of well-defined
101+
** The entire task-database is handled via a set of well-defined
102102
** functions from which the name starts with "pdb_..." (see the
103103
** source-file procdbase.c).
104104
** The processes which have been finished during the last cycle
@@ -112,7 +112,7 @@
112112
** these addresses can be modified in the main-function depending on particular
113113
** flags. In this way various representation-layers (ASCII, graphical, ...)
114114
** can be linked with 'atop'; the one to use can eventually be chosen
115-
** at runtime.
115+
** at runtime.
116116
**
117117
** $Log: atop.c,v $
118118
** Revision 1.49 2010/10/23 14:01:00 gerlof
@@ -296,6 +296,7 @@
296296
#include "showgeneric.h"
297297
#include "parseable.h"
298298
#include "gpucom.h"
299+
#include "photobpf.h"
299300

300301
#define allflags "ab:cde:fghijklmnopqrstuvwxyz1ABCDEFGHIJKL:MNOP:QRSTUVWXYZ"
301302
#define MAXFL 64 /* maximum number of command-line flags */
@@ -321,6 +322,16 @@ char usecolors = 1; /* boolean: colors for high occupation */
321322
char threadview = 0; /* boolean: show individual threads */
322323
char calcpss = 0; /* boolean: read/calculate process PSS */
323324

325+
/*
326+
** arguments for bpf stats sampling
327+
** We enable bpf stats for bpfsampleinterval seconds every bpfsamplerate
328+
** atop intervals. bpfsampleinterval must be smaller than atop interval.
329+
**
330+
** If bpfsamplerate == 0, disable sampling of bpf stats.
331+
*/
332+
unsigned int bpfsamplerate = 1;
333+
unsigned int bpfsampleinterval = 1;
334+
324335
unsigned short hertz;
325336
unsigned int pagesize;
326337
unsigned int nrgpus;
@@ -390,6 +401,9 @@ void do_almostcrit(char *, char *);
390401
void do_atopsarflags(char *, char *);
391402
void do_pacctdir(char *, char *);
392403
void do_perfevents(char *, char *);
404+
void do_bpflines(char *, char *);
405+
void do_bpfsamplerate(char *, char *);
406+
void do_bpfsampleinterval(char *, char *);
393407

394408
static struct {
395409
char *tag;
@@ -439,6 +453,9 @@ static struct {
439453
{ "atopsarflags", do_atopsarflags, 0, },
440454
{ "perfevents", do_perfevents, 0, },
441455
{ "pacctdir", do_pacctdir, 1, },
456+
{ "bpflines", do_bpflines, 0, },
457+
{ "bpfsamplerate", do_bpfsamplerate, 0, },
458+
{ "bpfsampleinterval", do_bpfsampleinterval, 0, },
442459
};
443460

444461
/*
@@ -465,6 +482,8 @@ main(int argc, char *argv[])
465482
exit(42);
466483
}
467484

485+
photo_bpf_check();
486+
468487
/*
469488
** preserve command arguments to allow restart of other version
470489
*/
@@ -496,12 +515,12 @@ main(int argc, char *argv[])
496515
if ( memcmp(p, "atopsar", 7) == 0)
497516
return atopsar(argc, argv);
498517

499-
/*
500-
** interpret command-line arguments & flags
518+
/*
519+
** interpret command-line arguments & flags
501520
*/
502521
if (argc > 1)
503522
{
504-
/*
523+
/*
505524
** gather all flags for visualization-functions
506525
**
507526
** generic flags will be handled here;
@@ -595,17 +614,17 @@ main(int argc, char *argv[])
595614
}
596615

597616
/*
598-
** get optional interval-value and optional number of samples
617+
** get optional interval-value and optional number of samples
599618
*/
600619
if (optind < argc && optind < MAXFL)
601620
{
602621
if (!numeric(argv[optind]))
603622
prusage(argv[0]);
604-
623+
605624
interval = atoi(argv[optind]);
606-
625+
607626
optind++;
608-
627+
609628
if (optind < argc)
610629
{
611630
if (!numeric(argv[optind]) )
@@ -761,6 +780,7 @@ engine(void)
761780
gpupending=0; /* boolean: request sent */
762781

763782
struct gpupidstat *gp = NULL;
783+
struct bstats *bstats = NULL;
764784

765785
/*
766786
** initialization: allocate required memory dynamically
@@ -812,6 +832,8 @@ engine(void)
812832
if (nrgpus)
813833
supportflags |= GPUSTAT;
814834

835+
if (system_support_bpf())
836+
supportflags |= BPFSTAT;
815837
/*
816838
** MAIN-LOOP:
817839
** - Wait for the requested number of seconds or for other trigger
@@ -833,11 +855,15 @@ engine(void)
833855
/*
834856
** if the limit-flag is specified:
835857
** check if the next sample is expected before midnight;
836-
** if not, stop atop now
858+
** if not, stop atop now
837859
*/
838860
if (midnightflag && (curtime+interval) > timelimit)
839861
break;
840862

863+
if ((supportflags & BPFSTAT) &&
864+
bpfsamplerate && sampcnt % bpfsamplerate == 0)
865+
bstats = get_devbstats();
866+
841867
/*
842868
** wait for alarm-signal to arrive (except first sample)
843869
** or wait for SIGUSR1/SIGUSR2
@@ -854,13 +880,13 @@ engine(void)
854880
curtime = time(0); /* seconds since 1-1-1970 */
855881

856882
/*
857-
** send request for statistics to atopgpud
883+
** send request for statistics to atopgpud
858884
*/
859885
if (nrgpus)
860886
gpupending = gpud_statrequest();
861887

862888
/*
863-
** take a snapshot of the current system-level statistics
889+
** take a snapshot of the current system-level statistics
864890
** and calculate the deviations (i.e. calculate the activity
865891
** during the last sample)
866892
*/
@@ -913,7 +939,7 @@ engine(void)
913939
curtime-pretime > 0 ? curtime-pretime : 1);
914940

915941
/*
916-
** take a snapshot of the current task-level statistics
942+
** take a snapshot of the current task-level statistics
917943
** and calculate the deviations (i.e. calculate the activity
918944
** during the last sample)
919945
**
@@ -1008,10 +1034,14 @@ engine(void)
10081034
** the deviations
10091035
*/
10101036
lastcmd = (vis.show_samp)( curtime,
1011-
curtime-pretime > 0 ? curtime-pretime : 1,
1012-
&devtstat, devsstat,
1013-
nprocexit, noverflow, sampcnt==0);
1037+
curtime-pretime > 0 ? curtime-pretime : 1,
1038+
&devtstat, devsstat, bstats,
1039+
nprocexit, noverflow, sampcnt==0);
10141040

1041+
if (bstats) {
1042+
free(bstats->bpfall);
1043+
bstats = NULL;
1044+
}
10151045
/*
10161046
** release dynamically allocated memory
10171047
*/
@@ -1060,7 +1090,7 @@ prusage(char *myname)
10601090
printf("\t -%c show version information\n", MVERSION);
10611091
printf("\t -%c show or log all processes (i.s.o. active processes "
10621092
"only)\n", MALLPROC);
1063-
printf("\t -%c calculate proportional set size (PSS) per process\n",
1093+
printf("\t -%c calculate proportional set size (PSS) per process\n",
10641094
MCALCPSS);
10651095
printf("\t -P generate parseable output for specified label(s)\n");
10661096
printf("\t -L alternate line length (default 80) in case of "
@@ -1140,6 +1170,18 @@ do_linelength(char *name, char *val)
11401170
linelen = get_posval(name, val);
11411171
}
11421172

1173+
void
1174+
do_bpfsamplerate(char *name, char *val)
1175+
{
1176+
bpfsamplerate = get_posval(name, val);
1177+
}
1178+
1179+
void
1180+
do_bpfsampleinterval(char *name, char *val)
1181+
{
1182+
bpfsampleinterval = get_posval(name, val);
1183+
}
1184+
11431185
/*
11441186
** read RC-file and modify defaults accordingly
11451187
*/
@@ -1190,7 +1232,7 @@ readrc(char *path, int syslevel)
11901232
default:
11911233
if (tagname[0] == '#')
11921234
continue;
1193-
1235+
11941236
if (tagvalue[0] != '#')
11951237
break;
11961238

atop.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ struct tstat;
4242
struct devtstat;
4343
struct sstat;
4444
struct netpertask;
45+
struct bstats;
4546

46-
/*
47+
/*
4748
** miscellaneous flags
4849
*/
4950
#define RRBOOT 0x0001
@@ -57,7 +58,7 @@ struct netpertask;
5758

5859
struct visualize {
5960
char (*show_samp) (time_t, int,
60-
struct devtstat *, struct sstat *,
61+
struct devtstat *, struct sstat *, struct bstats *,
6162
int, unsigned int, char);
6263
void (*show_error) (const char *, ...);
6364
void (*show_end) (void);
@@ -104,6 +105,9 @@ extern int netbadness;
104105
extern int pagbadness;
105106
extern int almostcrit;
106107

108+
extern int bpflines;
109+
extern unsigned int bpfsampleinterval;
110+
107111
/*
108112
** bit-values for supportflags
109113
*/
@@ -113,9 +117,10 @@ extern int almostcrit;
113117
#define NETATOPD 0x00000020
114118
#define DOCKSTAT 0x00000040
115119
#define GPUSTAT 0x00000080
120+
#define BPFSTAT 0x00000100
116121

117122
/*
118-
** in rawlog file, the four least significant bits
123+
** in rawlog file, the four least significant bits
119124
** are moved to the per-sample flags and therefor dummy
120125
** in the support flags of the general header
121126
*/
@@ -125,7 +130,7 @@ extern int almostcrit;
125130
** structure containing the start-addresses of functions for visualization
126131
*/
127132
char generic_samp (time_t, int,
128-
struct devtstat *, struct sstat *,
133+
struct devtstat *, struct sstat *, struct bstats *,
129134
int, unsigned int, char);
130135
void generic_error(const char *, ...);
131136
void generic_end (void);
@@ -167,7 +172,7 @@ int contcompar(const void *, const void *);
167172
count_t subcount(count_t, count_t);
168173
int rawread(void);
169174
char rawwrite (time_t, int,
170-
struct devtstat *, struct sstat *,
175+
struct devtstat *, struct sstat *, struct bstats *,
171176
int, unsigned int, char);
172177

173178
int numeric(char *);

0 commit comments

Comments
 (0)