Skip to content

Commit 513feff

Browse files
committed
bpf: udp: Avoid socket skips and repeats during iteration
Replace the offset-based approach for tracking progress through a bucket in the UDP table with one based on socket cookies. Remember the cookies of unprocessed sockets from the last batch and use this list to pick up where we left off or, in the case that the next socket disappears between reads, find the first socket after that point that still exists in the bucket and resume from there. This approach guarantees that all sockets that existed when iteration began and continue to exist throughout will be visited exactly once. Sockets that are added to the table during iteration may or may not be seen, but if they are they will be seen exactly once. Initialize iter->st_bucket_done to true and iter->state.bucket to -1 to ensure that on the first call to bpf_iter_udp_batch, the resume_bucket case is not hit. It's not strictly accurate that we are resuming from bucket zero when we create the first batch, and this avoids adding special case logic for just that bucket. Signed-off-by: Jordan Rife <[email protected]>
1 parent 9e2f993 commit 513feff

File tree

1 file changed

+50
-17
lines changed

1 file changed

+50
-17
lines changed

net/ipv4/udp.c

+50-17
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#include <linux/inet.h>
9494
#include <linux/netdevice.h>
9595
#include <linux/slab.h>
96+
#include <linux/sock_diag.h>
9697
#include <net/tcp_states.h>
9798
#include <linux/skbuff.h>
9899
#include <linux/proc_fs.h>
@@ -3392,34 +3393,51 @@ struct bpf_iter__udp {
33923393

33933394
union bpf_udp_iter_batch_item {
33943395
struct sock *sock;
3396+
__u64 cookie;
33953397
};
33963398

33973399
struct bpf_udp_iter_state {
33983400
struct udp_iter_state state;
33993401
unsigned int cur_sk;
34003402
unsigned int end_sk;
34013403
unsigned int max_sk;
3402-
int offset;
34033404
union bpf_udp_iter_batch_item *batch;
34043405
bool st_bucket_done;
34053406
};
34063407

34073408
static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
34083409
unsigned int new_batch_sz, int flags);
3410+
static struct sock *bpf_iter_udp_resume(struct sock *first_sk,
3411+
union bpf_udp_iter_batch_item *cookies,
3412+
int n_cookies)
3413+
{
3414+
struct sock *sk = NULL;
3415+
int i = 0;
3416+
3417+
for (; i < n_cookies; i++) {
3418+
sk = first_sk;
3419+
udp_portaddr_for_each_entry_from(sk)
3420+
if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
3421+
goto done;
3422+
}
3423+
done:
3424+
return sk;
3425+
}
3426+
34093427
static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34103428
{
34113429
struct bpf_udp_iter_state *iter = seq->private;
34123430
struct udp_iter_state *state = &iter->state;
3431+
unsigned int find_cookie, end_cookie = 0;
34133432
struct net *net = seq_file_net(seq);
3414-
int resume_bucket, resume_offset;
34153433
struct udp_table *udptable;
34163434
unsigned int batch_sks = 0;
3435+
int resume_bucket;
34173436
int resizes = 0;
34183437
struct sock *sk;
34193438
int err = 0;
34203439

34213440
resume_bucket = state->bucket;
3422-
resume_offset = iter->offset;
34233441

34243442
/* The current batch is done, so advance the bucket. */
34253443
if (iter->st_bucket_done)
@@ -3435,6 +3453,8 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34353453
* before releasing the bucket lock. This allows BPF programs that are
34363454
* called in seq_show to acquire the bucket lock if needed.
34373455
*/
3456+
find_cookie = iter->cur_sk;
3457+
end_cookie = iter->end_sk;
34383458
iter->cur_sk = 0;
34393459
iter->end_sk = 0;
34403460
iter->st_bucket_done = false;
@@ -3446,21 +3466,22 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34463466
if (hlist_empty(&hslot2->head))
34473467
goto next_bucket;
34483468

3449-
iter->offset = 0;
34503469
spin_lock_bh(&hslot2->lock);
34513470
sk = hlist_entry_safe(hslot2->head.first, struct sock,
34523471
__sk_common.skc_portaddr_node);
3472+
/* Resume from the first (in iteration order) unseen socket from
3473+
* the last batch that still exists in resume_bucket. Most of
3474+
* the time this will just be where the last iteration left off
3475+
* in resume_bucket unless that socket disappeared between
3476+
* reads.
3477+
*/
3478+
if (state->bucket == resume_bucket)
3479+
sk = bpf_iter_udp_resume(sk,
3480+
&iter->batch[find_cookie],
3481+
end_cookie - find_cookie);
34533482
fill_batch:
34543483
udp_portaddr_for_each_entry_from(sk) {
34553484
if (seq_sk_match(seq, sk)) {
3456-
/* Resume from the last iterated socket at the
3457-
* offset in the bucket before iterator was stopped.
3458-
*/
3459-
if (state->bucket == resume_bucket &&
3460-
iter->offset < resume_offset) {
3461-
++iter->offset;
3462-
continue;
3463-
}
34643485
if (iter->end_sk < iter->max_sk) {
34653486
sock_hold(sk);
34663487
iter->batch[iter->end_sk++].sock = sk;
@@ -3528,10 +3549,8 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
35283549
/* Whenever seq_next() is called, the iter->cur_sk is
35293550
* done with seq_show(), so unref the iter->cur_sk.
35303551
*/
3531-
if (iter->cur_sk < iter->end_sk) {
3552+
if (iter->cur_sk < iter->end_sk)
35323553
sock_put(iter->batch[iter->cur_sk++].sock);
3533-
++iter->offset;
3534-
}
35353554

35363555
/* After updating iter->cur_sk, check if there are more sockets
35373556
* available in the current bucket batch.
@@ -3601,8 +3620,19 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
36013620

36023621
static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
36033622
{
3604-
while (iter->cur_sk < iter->end_sk)
3605-
sock_put(iter->batch[iter->cur_sk++].sock);
3623+
union bpf_udp_iter_batch_item *item;
3624+
unsigned int cur_sk = iter->cur_sk;
3625+
__u64 cookie;
3626+
3627+
/* Remember the cookies of the sockets we haven't seen yet, so we can
3628+
* pick up where we left off next time around.
3629+
*/
3630+
while (cur_sk < iter->end_sk) {
3631+
item = &iter->batch[cur_sk++];
3632+
cookie = sock_gen_cookie(item->sock);
3633+
sock_put(item->sock);
3634+
item->cookie = cookie;
3635+
}
36063636
}
36073637

36083638
static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
@@ -3898,6 +3928,9 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
38983928
if (ret)
38993929
bpf_iter_fini_seq_net(priv_data);
39003930

3931+
iter->st_bucket_done = true;
3932+
iter->state.bucket = -1;
3933+
39013934
return ret;
39023935
}
39033936

0 commit comments

Comments
 (0)