@@ -357,18 +357,36 @@ enum bpf_link_type {
357
357
#define BPF_F_SLEEPABLE (1U << 4)
358
358
359
359
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
360
- * two extensions:
361
- *
362
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
363
- * insn[0].imm: map fd map fd
364
- * insn[1].imm: 0 offset into value
365
- * insn[0].off: 0 0
366
- * insn[1].off: 0 0
367
- * ldimm64 rewrite: address of map address of map[0]+offset
368
- * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
360
+ * the following extensions:
361
+ *
362
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD
363
+ * insn[0].imm: map fd
364
+ * insn[1].imm: 0
365
+ * insn[0].off: 0
366
+ * insn[1].off: 0
367
+ * ldimm64 rewrite: address of map
368
+ * verifier type: CONST_PTR_TO_MAP
369
369
*/
370
370
#define BPF_PSEUDO_MAP_FD 1
371
+ /* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
372
+ * insn[0].imm: map fd
373
+ * insn[1].imm: offset into value
374
+ * insn[0].off: 0
375
+ * insn[1].off: 0
376
+ * ldimm64 rewrite: address of map[0]+offset
377
+ * verifier type: PTR_TO_MAP_VALUE
378
+ */
371
379
#define BPF_PSEUDO_MAP_VALUE 2
380
+ /* insn[0].src_reg: BPF_PSEUDO_BTF_ID
381
+ * insn[0].imm: kernel btd id of VAR
382
+ * insn[1].imm: 0
383
+ * insn[0].off: 0
384
+ * insn[1].off: 0
385
+ * ldimm64 rewrite: address of the kernel variable
386
+ * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
387
+ * is struct/union.
388
+ */
389
+ #define BPF_PSEUDO_BTF_ID 3
372
390
373
391
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
374
392
* offset to another bpf function
@@ -415,6 +433,12 @@ enum {
415
433
416
434
/* Enable memory-mapping BPF map */
417
435
BPF_F_MMAPABLE = (1U << 10 ),
436
+
437
+ /* Share perf_event among processes */
438
+ BPF_F_PRESERVE_ELEMS = (1U << 11 ),
439
+
440
+ /* Create a map that is suitable to be an inner map with dynamic max entries */
441
+ BPF_F_INNER_MAP = (1U << 12 ),
418
442
};
419
443
420
444
/* Flags for BPF_PROG_QUERY. */
@@ -1678,7 +1702,7 @@ union bpf_attr {
1678
1702
* **TCP_CONGESTION**, **TCP_BPF_IW**,
1679
1703
* **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
1680
1704
* **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
1681
- * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
1705
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT **.
1682
1706
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
1683
1707
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
1684
1708
* Return
@@ -2233,7 +2257,7 @@ union bpf_attr {
2233
2257
* Description
2234
2258
* This helper is used in programs implementing policies at the
2235
2259
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
2236
- * if the verdeict eBPF program returns **SK_PASS**), redirect it
2260
+ * if the verdict eBPF program returns **SK_PASS**), redirect it
2237
2261
* to the socket referenced by *map* (of type
2238
2262
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
2239
2263
* egress interfaces can be used for redirection. The
@@ -3654,15 +3678,68 @@ union bpf_attr {
3654
3678
* Return
3655
3679
* The id is returned or 0 in case the id could not be retrieved.
3656
3680
*
3657
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
3681
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
3658
3682
* Description
3659
3683
* Redirect the packet to another net device of index *ifindex*
3660
3684
* and fill in L2 addresses from neighboring subsystem. This helper
3661
3685
* is somewhat similar to **bpf_redirect**\ (), except that it
3662
- * fills in e.g. MAC addresses based on the L3 information from
3663
- * the packet. This helper is supported for IPv4 and IPv6 protocols.
3686
+ * populates L2 addresses as well, meaning, internally, the helper
3687
+ * relies on the neighbor lookup for the L2 address of the nexthop.
3688
+ *
3689
+ * The helper will perform a FIB lookup based on the skb's
3690
+ * networking header to get the address of the next hop, unless
3691
+ * this is supplied by the caller in the *params* argument. The
3692
+ * *plen* argument indicates the len of *params* and should be set
3693
+ * to 0 if *params* is NULL.
3694
+ *
3695
+ * The *flags* argument is reserved and must be 0. The helper is
3696
+ * currently only supported for tc BPF program types, and enabled
3697
+ * for IPv4 and IPv6 protocols.
3698
+ * Return
3699
+ * The helper returns **TC_ACT_REDIRECT** on success or
3700
+ * **TC_ACT_SHOT** on error.
3701
+ *
3702
+ * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu)
3703
+ * Description
3704
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3705
+ * pointer to the percpu kernel variable on *cpu*. A ksym is an
3706
+ * extern variable decorated with '__ksym'. For ksym, there is a
3707
+ * global var (either static or global) defined of the same name
3708
+ * in the kernel. The ksym is percpu if the global var is percpu.
3709
+ * The returned pointer points to the global percpu var on *cpu*.
3710
+ *
3711
+ * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
3712
+ * kernel, except that bpf_per_cpu_ptr() may return NULL. This
3713
+ * happens if *cpu* is larger than nr_cpu_ids. The caller of
3714
+ * bpf_per_cpu_ptr() must check the returned value.
3715
+ * Return
3716
+ * A pointer pointing to the kernel percpu variable on *cpu*, or
3717
+ * NULL, if *cpu* is invalid.
3718
+ *
3719
+ * void *bpf_this_cpu_ptr(const void *percpu_ptr)
3720
+ * Description
3721
+ * Take a pointer to a percpu ksym, *percpu_ptr*, and return a
3722
+ * pointer to the percpu kernel variable on this cpu. See the
3723
+ * description of 'ksym' in **bpf_per_cpu_ptr**\ ().
3724
+ *
3725
+ * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
3726
+ * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would
3727
+ * never return NULL.
3728
+ * Return
3729
+ * A pointer pointing to the kernel percpu variable on this cpu.
3730
+ *
3731
+ * long bpf_redirect_peer(u32 ifindex, u64 flags)
3732
+ * Description
3733
+ * Redirect the packet to another net device of index *ifindex*.
3734
+ * This helper is somewhat similar to **bpf_redirect**\ (), except
3735
+ * that the redirection happens to the *ifindex*' peer device and
3736
+ * the netns switch takes place from ingress to ingress without
3737
+ * going through the CPU's backlog queue.
3738
+ *
3664
3739
* The *flags* argument is reserved and must be 0. The helper is
3665
- * currently only supported for tc BPF program types.
3740
+ * currently only supported for tc BPF program types at the ingress
3741
+ * hook and for veth device types. The peer device must reside in a
3742
+ * different network namespace.
3666
3743
* Return
3667
3744
* The helper returns **TC_ACT_REDIRECT** on success or
3668
3745
* **TC_ACT_SHOT** on error.
@@ -3821,6 +3898,9 @@ union bpf_attr {
3821
3898
FN(seq_printf_btf), \
3822
3899
FN(skb_cgroup_classid), \
3823
3900
FN(redirect_neigh), \
3901
+ FN(bpf_per_cpu_ptr), \
3902
+ FN(bpf_this_cpu_ptr), \
3903
+ FN(redirect_peer), \
3824
3904
/* */
3825
3905
3826
3906
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4831,6 +4911,16 @@ struct bpf_fib_lookup {
4831
4911
__u8 dmac [6 ]; /* ETH_ALEN */
4832
4912
};
4833
4913
4914
+ struct bpf_redir_neigh {
4915
+ /* network family for lookup (AF_INET, AF_INET6) */
4916
+ __u32 nh_family ;
4917
+ /* network address of nexthop; skips fib lookup to find gateway */
4918
+ union {
4919
+ __be32 ipv4_nh ;
4920
+ __u32 ipv6_nh [4 ]; /* in6_addr; network order */
4921
+ };
4922
+ };
4923
+
4834
4924
enum bpf_task_fd_type {
4835
4925
BPF_FD_TYPE_RAW_TRACEPOINT , /* tp name */
4836
4926
BPF_FD_TYPE_TRACEPOINT , /* tp name */
0 commit comments