Skip to content

Commit 0fb299f

Browse files
edumazetmehmetb0
authored andcommitted
inetpeer: do not get a refcount in inet_getpeer()
BugLink: https://bugs.launchpad.net/bugs/2111953 [ Upstream commit a853c60 ] All inet_getpeer() callers except ip4_frag_init() don't need to acquire a permanent refcount on the inetpeer. They can switch to full RCU protection. Move the refcount_inc_not_zero() into ip4_frag_init(), so that all the other callers no longer have to perform a pair of expensive atomic operations on a possibly contended cache line. inet_putpeer() no longer needs to be exported. After this patch, my DUT can receive 8,400,000 UDP packets per second targeting closed ports, using 50% less cpu cycles than before. Also change two calls to l3mdev_master_ifindex() by l3mdev_master_ifindex_rcu() (Ido ideas) Fixes: 8c2bd38 ("icmp: change the order of rate limits") Signed-off-by: Eric Dumazet <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]> Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Manuel Diewald <[email protected]> Signed-off-by: Mehmet Basaran <[email protected]>
1 parent 17b1bbe commit 0fb299f

File tree

7 files changed

+32
-29
lines changed

7 files changed

+32
-29
lines changed

net/ipv4/icmp.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
316316
struct dst_entry *dst = &rt->dst;
317317
struct inet_peer *peer;
318318
bool rc = true;
319-
int vif;
320319

321320
if (!apply_ratelimit)
322321
return true;
@@ -325,12 +324,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
325324
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
326325
goto out;
327326

328-
vif = l3mdev_master_ifindex(dst->dev);
329-
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
327+
rcu_read_lock();
328+
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
329+
l3mdev_master_ifindex_rcu(dst->dev));
330330
rc = inet_peer_xrlim_allow(peer,
331331
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
332-
if (peer)
333-
inet_putpeer(peer);
332+
rcu_read_unlock();
334333
out:
335334
if (!rc)
336335
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);

net/ipv4/inetpeer.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
112112
p = rb_entry(parent, struct inet_peer, rb_node);
113113
cmp = inetpeer_addr_cmp(daddr, &p->daddr);
114114
if (cmp == 0) {
115-
if (!refcount_inc_not_zero(&p->refcnt))
116-
break;
117115
now = jiffies;
118116
if (READ_ONCE(p->dtime) != now)
119117
WRITE_ONCE(p->dtime, now);
@@ -177,6 +175,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
177175
}
178176
}
179177

178+
/* Must be called under RCU : No refcount change is done here. */
180179
struct inet_peer *inet_getpeer(struct inet_peer_base *base,
181180
const struct inetpeer_addr *daddr)
182181
{
@@ -187,10 +186,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
187186
/* Attempt a lockless lookup first.
188187
* Because of a concurrent writer, we might not find an existing entry.
189188
*/
190-
rcu_read_lock();
191189
seq = read_seqbegin(&base->lock);
192190
p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
193-
rcu_read_unlock();
194191

195192
if (p)
196193
return p;
@@ -208,7 +205,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
208205
if (p) {
209206
p->daddr = *daddr;
210207
p->dtime = (__u32)jiffies;
211-
refcount_set(&p->refcnt, 2);
208+
refcount_set(&p->refcnt, 1);
212209
atomic_set(&p->rid, 0);
213210
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
214211
p->rate_tokens = 0;
@@ -236,7 +233,6 @@ void inet_putpeer(struct inet_peer *p)
236233
if (refcount_dec_and_test(&p->refcnt))
237234
call_rcu(&p->rcu, inetpeer_free_rcu);
238235
}
239-
EXPORT_SYMBOL_GPL(inet_putpeer);
240236

241237
/*
242238
* Check transmit rate limitation for given message.

net/ipv4/ip_fragment.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
8282
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
8383
{
8484
struct ipq *qp = container_of(q, struct ipq, q);
85-
struct net *net = q->fqdir->net;
86-
8785
const struct frag_v4_compare_key *key = a;
86+
struct net *net = q->fqdir->net;
87+
struct inet_peer *p = NULL;
8888

8989
q->key.v4 = *key;
9090
qp->ecn = 0;
91-
qp->peer = q->fqdir->max_dist ?
92-
inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) :
93-
NULL;
91+
if (q->fqdir->max_dist) {
92+
rcu_read_lock();
93+
p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
94+
if (p && !refcount_inc_not_zero(&p->refcnt))
95+
p = NULL;
96+
rcu_read_unlock();
97+
}
98+
qp->peer = p;
9499
}
95100

96101
static void ip4_frag_free(struct inet_frag_queue *q)

net/ipv4/route.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -882,11 +882,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
882882
}
883883
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
884884
vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
885-
rcu_read_unlock();
886885

887886
net = dev_net(rt->dst.dev);
888887
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
889888
if (!peer) {
889+
rcu_read_unlock();
890890
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
891891
rt_nexthop(rt, ip_hdr(skb)->daddr));
892892
return;
@@ -905,7 +905,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
905905
*/
906906
if (peer->n_redirects >= ip_rt_redirect_number) {
907907
peer->rate_last = jiffies;
908-
goto out_put_peer;
908+
goto out_unlock;
909909
}
910910

911911
/* Check for load limit; set rate_last to the latest sent
@@ -926,8 +926,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
926926
&ip_hdr(skb)->saddr, inet_iif(skb),
927927
&ip_hdr(skb)->daddr, &gw);
928928
}
929-
out_put_peer:
930-
inet_putpeer(peer);
929+
out_unlock:
930+
rcu_read_unlock();
931931
}
932932

933933
static int ip_error(struct sk_buff *skb)
@@ -987,9 +987,9 @@ static int ip_error(struct sk_buff *skb)
987987
break;
988988
}
989989

990+
rcu_read_lock();
990991
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
991-
l3mdev_master_ifindex(skb->dev));
992-
992+
l3mdev_master_ifindex_rcu(skb->dev));
993993
send = true;
994994
if (peer) {
995995
now = jiffies;
@@ -1001,8 +1001,9 @@ static int ip_error(struct sk_buff *skb)
10011001
peer->rate_tokens -= ip_rt_error_cost;
10021002
else
10031003
send = false;
1004-
inet_putpeer(peer);
10051004
}
1005+
rcu_read_unlock();
1006+
10061007
if (send)
10071008
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
10081009

net/ipv6/icmp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
222222
if (rt->rt6i_dst.plen < 128)
223223
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
224224

225+
rcu_read_lock();
225226
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
226227
res = inet_peer_xrlim_allow(peer, tmo);
227-
if (peer)
228-
inet_putpeer(peer);
228+
rcu_read_unlock();
229229
}
230230
if (!res)
231231
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),

net/ipv6/ip6_output.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,15 +616,15 @@ int ip6_forward(struct sk_buff *skb)
616616
else
617617
target = &hdr->daddr;
618618

619+
rcu_read_lock();
619620
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
620621

621622
/* Limit redirects both by destination (here)
622623
and by source (inside ndisc_send_redirect)
623624
*/
624625
if (inet_peer_xrlim_allow(peer, 1*HZ))
625626
ndisc_send_redirect(skb, target);
626-
if (peer)
627-
inet_putpeer(peer);
627+
rcu_read_unlock();
628628
} else {
629629
int addrtype = ipv6_addr_type(&hdr->saddr);
630630

net/ipv6/ndisc.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,10 +1717,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
17171717
"Redirect: destination is not a neighbour\n");
17181718
goto release;
17191719
}
1720+
1721+
rcu_read_lock();
17201722
peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
17211723
ret = inet_peer_xrlim_allow(peer, 1*HZ);
1722-
if (peer)
1723-
inet_putpeer(peer);
1724+
rcu_read_unlock();
1725+
17241726
if (!ret)
17251727
goto release;
17261728

0 commit comments

Comments
 (0)