Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+15

Documentation/netlink/specs/rt_link.yaml

··· 920 920 - name: l2 921 921 - name: l3 922 922 923 + - 924 + name: netkit-scrub 925 + type: enum 926 + entries: 927 + - name: none 928 + - name: default 929 + 923 930 attribute-sets: 924 931 - 925 932 name: link-attrs ··· 2158 2151 name: mode 2159 2152 type: u32 2160 2153 enum: netkit-mode 2154 + - 2155 + name: scrub 2156 + type: u32 2157 + enum: netkit-scrub 2158 + - 2159 + name: peer-scrub 2160 + type: u32 2161 + enum: netkit-scrub 2161 2162 2162 2163 sub-messages: 2163 2164 -

+1 -1

MAINTAINERS

··· 16300 16300 F: include/trace/events/mptcp.h 16301 16301 F: include/uapi/linux/mptcp*.h 16302 16302 F: net/mptcp/ 16303 - F: tools/testing/selftests/bpf/*/*mptcp*.c 16303 + F: tools/testing/selftests/bpf/*/*mptcp*.[ch] 16304 16304 F: tools/testing/selftests/net/mptcp/ 16305 16305 16306 16306 NETWORKING [TCP]

+57 -34

drivers/net/netkit.c

··· 20 20 struct net_device __rcu *peer; 21 21 struct bpf_mprog_entry __rcu *active; 22 22 enum netkit_action policy; 23 + enum netkit_scrub scrub; 23 24 struct bpf_mprog_bundle bundle; 24 25 25 26 /* Needed in slow-path */ ··· 51 50 return ret; 52 51 } 53 52 54 - static void netkit_prep_forward(struct sk_buff *skb, bool xnet) 53 + static void netkit_xnet(struct sk_buff *skb) 55 54 { 56 - skb_scrub_packet(skb, xnet); 57 55 skb->priority = 0; 56 + skb->mark = 0; 57 + } 58 + 59 + static void netkit_prep_forward(struct sk_buff *skb, 60 + bool xnet, bool xnet_scrub) 61 + { 62 + skb_scrub_packet(skb, false); 58 63 nf_skip_egress(skb, true); 59 64 skb_reset_mac_header(skb); 65 + if (!xnet) 66 + return; 67 + ipvs_reset(skb); 68 + skb_clear_tstamp(skb); 69 + if (xnet_scrub) 70 + netkit_xnet(skb); 60 71 } 61 72 62 73 static struct netkit *netkit_priv(const struct net_device *dev) ··· 93 80 !pskb_may_pull(skb, ETH_HLEN) || 94 81 skb_orphan_frags(skb, GFP_ATOMIC))) 95 82 goto drop; 96 - netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer))); 83 + netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)), 84 + nk->scrub); 97 85 eth_skb_pkt_type(skb, peer); 98 86 skb->dev = peer; 99 87 entry = rcu_dereference(nk->active); ··· 311 297 } 312 298 } 313 299 314 - static int netkit_check_mode(int mode, struct nlattr *tb, 315 - struct netlink_ext_ack *extack) 316 - { 317 - switch (mode) { 318 - case NETKIT_L2: 319 - case NETKIT_L3: 320 - return 0; 321 - default: 322 - NL_SET_ERR_MSG_ATTR(extack, tb, 323 - "Provided device mode can only be L2 or L3"); 324 - return -EINVAL; 325 - } 326 - } 327 - 328 300 static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], 329 301 struct netlink_ext_ack *extack) 330 302 { ··· 332 332 struct netlink_ext_ack *extack) 333 333 { 334 334 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr; 335 - enum netkit_action default_prim = NETKIT_PASS; 336 - enum netkit_action default_peer = NETKIT_PASS; 335 + enum netkit_action policy_prim = NETKIT_PASS; 336 + enum netkit_action policy_peer = NETKIT_PASS; 337 + enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; 338 + enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; 337 339 enum netkit_mode mode = NETKIT_L3; 338 340 unsigned char ifname_assign_type; 339 341 struct ifinfomsg *ifmp = NULL; ··· 346 344 int err; 347 345 348 346 if (data) { 349 - if (data[IFLA_NETKIT_MODE]) { 350 - attr = data[IFLA_NETKIT_MODE]; 351 - mode = nla_get_u32(attr); 352 - err = netkit_check_mode(mode, attr, extack); 353 - if (err < 0) 354 - return err; 355 - } 347 + if (data[IFLA_NETKIT_MODE]) 348 + mode = nla_get_u32(data[IFLA_NETKIT_MODE]); 356 349 if (data[IFLA_NETKIT_PEER_INFO]) { 357 350 attr = data[IFLA_NETKIT_PEER_INFO]; 358 351 ifmp = nla_data(attr); ··· 359 362 return err; 360 363 tbp = peer_tb; 361 364 } 365 + if (data[IFLA_NETKIT_SCRUB]) 366 + scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]); 367 + if (data[IFLA_NETKIT_PEER_SCRUB]) 368 + scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]); 362 369 if (data[IFLA_NETKIT_POLICY]) { 363 370 attr = data[IFLA_NETKIT_POLICY]; 364 - default_prim = nla_get_u32(attr); 365 - err = netkit_check_policy(default_prim, attr, extack); 371 + policy_prim = nla_get_u32(attr); 372 + err = netkit_check_policy(policy_prim, attr, extack); 366 373 if (err < 0) 367 374 return err; 368 375 } 369 376 if (data[IFLA_NETKIT_PEER_POLICY]) { 370 377 attr = data[IFLA_NETKIT_PEER_POLICY]; 371 - default_peer = nla_get_u32(attr); 372 - err = netkit_check_policy(default_peer, attr, extack); 378 + policy_peer = nla_get_u32(attr); 379 + err = netkit_check_policy(policy_peer, attr, extack); 373 380 if (err < 0) 374 381 return err; 375 382 } ··· 410 409 411 410 nk = netkit_priv(peer); 412 411 nk->primary = false; 413 - nk->policy = default_peer; 412 + nk->policy = policy_peer; 413 + nk->scrub = scrub_peer; 414 414 nk->mode = mode; 415 415 bpf_mprog_bundle_init(&nk->bundle); 416 416 ··· 436 434 437 435 nk = netkit_priv(dev); 438 436 nk->primary = true; 439 - nk->policy = default_prim; 437 + nk->policy = policy_prim; 438 + nk->scrub = scrub_prim; 440 439 nk->mode = mode; 441 440 bpf_mprog_bundle_init(&nk->bundle); 442 441 ··· 877 874 return -EACCES; 878 875 } 879 876 877 + if (data[IFLA_NETKIT_SCRUB]) { 878 + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB], 879 + "netkit scrubbing cannot be changed after device creation"); 880 + return -EACCES; 881 + } 882 + 883 + if (data[IFLA_NETKIT_PEER_SCRUB]) { 884 + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB], 885 + "netkit scrubbing cannot be changed after device creation"); 886 + return -EACCES; 887 + } 888 + 880 889 if (data[IFLA_NETKIT_PEER_INFO]) { 881 890 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], 882 891 "netkit peer info cannot be changed after device creation"); ··· 923 908 { 924 909 return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ 925 910 nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */ 926 - nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ 911 + nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */ 912 + nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ 927 913 nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */ 914 + nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ 928 915 0; 929 916 } 930 917 ··· 941 924 return -EMSGSIZE; 942 925 if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode)) 943 926 return -EMSGSIZE; 927 + if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) 928 + return -EMSGSIZE; 944 929 945 930 if (peer) { 946 931 nk = netkit_priv(peer); 947 932 if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy)) 933 + return -EMSGSIZE; 934 + if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub)) 948 935 return -EMSGSIZE; 949 936 } 950 937 ··· 957 936 958 937 static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { 959 938 [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) }, 939 + [IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3), 960 940 [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, 961 - [IFLA_NETKIT_MODE] = { .type = NLA_U32 }, 962 941 [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, 942 + [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), 943 + [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), 963 944 [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, 964 945 .reject_message = "Primary attribute is read-only" }, 965 946 };

+7 -7

include/net/xdp_sock_drv.h

··· 126 126 if (likely(!xdp_buff_has_frags(xdp))) 127 127 goto out; 128 128 129 - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { 130 - list_del(&pos->xskb_list_node); 129 + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { 130 + list_del(&pos->list_node); 131 131 xp_free(pos); 132 132 } 133 133 ··· 140 140 { 141 141 struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); 142 142 143 - list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list); 143 + list_add_tail(&frag->list_node, &frag->pool->xskb_list); 144 144 } 145 145 146 146 static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) ··· 150 150 struct xdp_buff_xsk *frag; 151 151 152 152 frag = list_first_entry_or_null(&xskb->pool->xskb_list, 153 - struct xdp_buff_xsk, xskb_list_node); 153 + struct xdp_buff_xsk, list_node); 154 154 if (frag) { 155 - list_del(&frag->xskb_list_node); 155 + list_del(&frag->list_node); 156 156 ret = &frag->xdp; 157 157 } 158 158 ··· 163 163 { 164 164 struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); 165 165 166 - list_del(&xskb->xskb_list_node); 166 + list_del(&xskb->list_node); 167 167 } 168 168 169 169 static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) ··· 172 172 struct xdp_buff_xsk *frag; 173 173 174 174 frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, 175 - xskb_list_node); 175 + list_node); 176 176 return &frag->xdp; 177 177 } 178 178

+13 -10

include/net/xsk_buff_pool.h

··· 28 28 dma_addr_t dma; 29 29 dma_addr_t frame_dma; 30 30 struct xsk_buff_pool *pool; 31 - u64 orig_addr; 32 - struct list_head free_list_node; 33 - struct list_head xskb_list_node; 31 + struct list_head list_node; 34 32 }; 35 33 36 34 #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) ··· 76 78 u32 chunk_size; 77 79 u32 chunk_shift; 78 80 u32 frame_len; 81 + u32 xdp_zc_max_segs; 79 82 u8 tx_metadata_len; /* inherited from umem */ 80 83 u8 cached_need_wakeup; 81 84 bool uses_need_wakeup; ··· 119 120 static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, 120 121 u64 addr) 121 122 { 122 - xskb->orig_addr = addr; 123 123 xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; 124 124 } 125 125 ··· 220 222 xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; 221 223 } 222 224 223 - static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) 225 + static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, 226 + struct xsk_buff_pool *pool) 224 227 { 225 - u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; 228 + u64 orig_addr = xskb->xdp.data - pool->addrs; 229 + u64 offset; 226 230 227 - offset += xskb->pool->headroom; 228 - if (!xskb->pool->unaligned) 229 - return xskb->orig_addr + offset; 230 - return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); 231 + if (!pool->unaligned) 232 + return orig_addr; 233 + 234 + offset = xskb->xdp.data - xskb->xdp.data_hard_start; 235 + orig_addr -= offset; 236 + offset += pool->headroom; 237 + return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); 231 238 } 232 239 233 240 static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)

+15

include/uapi/linux/if_link.h

··· 1293 1293 NETKIT_L3, 1294 1294 }; 1295 1295 1296 + /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to 1297 + * the BPF program if attached. This also means the latter can 1298 + * consume the two fields if they were populated earlier. 1299 + * 1300 + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before 1301 + * invoking the attached BPF program when the peer device resides 1302 + * in a different network namespace. This is the default behavior. 1303 + */ 1304 + enum netkit_scrub { 1305 + NETKIT_SCRUB_NONE, 1306 + NETKIT_SCRUB_DEFAULT, 1307 + }; 1308 + 1296 1309 enum { 1297 1310 IFLA_NETKIT_UNSPEC, 1298 1311 IFLA_NETKIT_PEER_INFO, ··· 1313 1300 IFLA_NETKIT_POLICY, 1314 1301 IFLA_NETKIT_PEER_POLICY, 1315 1302 IFLA_NETKIT_MODE, 1303 + IFLA_NETKIT_SCRUB, 1304 + IFLA_NETKIT_PEER_SCRUB, 1316 1305 __IFLA_NETKIT_MAX, 1317 1306 }; 1318 1307 #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)

+13 -4

net/core/filter.c

··· 5138 5138 return net->net_cookie; 5139 5139 } 5140 5140 5141 + BPF_CALL_1(bpf_get_netns_cookie, struct sk_buff *, skb) 5142 + { 5143 + return __bpf_get_netns_cookie(skb && skb->sk ? skb->sk : NULL); 5144 + } 5145 + 5146 + static const struct bpf_func_proto bpf_get_netns_cookie_proto = { 5147 + .func = bpf_get_netns_cookie, 5148 + .ret_type = RET_INTEGER, 5149 + .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 5150 + }; 5151 + 5141 5152 BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx) 5142 5153 { 5143 5154 return __bpf_get_netns_cookie(ctx); ··· 8216 8205 return &bpf_skb_under_cgroup_proto; 8217 8206 case BPF_FUNC_get_socket_cookie: 8218 8207 return &bpf_get_socket_cookie_proto; 8208 + case BPF_FUNC_get_netns_cookie: 8209 + return &bpf_get_netns_cookie_proto; 8219 8210 case BPF_FUNC_get_socket_uid: 8220 8211 return &bpf_get_socket_uid_proto; 8221 8212 case BPF_FUNC_fib_lookup: ··· 10249 10236 S, NS, F, NF, SIZE, OFF); \ 10250 10237 } \ 10251 10238 } while (0) 10252 - 10253 - #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ 10254 - SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ 10255 - S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) 10256 10239 10257 10240 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, 10258 10241 const struct bpf_insn *si,

+19 -19

net/xdp/xsk.c

··· 141 141 u64 addr; 142 142 int err; 143 143 144 - addr = xp_get_handle(xskb); 144 + addr = xp_get_handle(xskb, xskb->pool); 145 145 err = xskq_prod_reserve_desc(xs->rx, addr, len, flags); 146 146 if (err) { 147 147 xs->rx_queue_full++; ··· 171 171 return 0; 172 172 173 173 xskb_list = &xskb->pool->xskb_list; 174 - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { 174 + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { 175 175 if (list_is_singular(xskb_list)) 176 176 contd = 0; 177 177 len = pos->xdp.data_end - pos->xdp.data; 178 178 err = __xsk_rcv_zc(xs, pos, len, contd); 179 179 if (err) 180 180 goto err; 181 - list_del(&pos->xskb_list_node); 181 + list_del(&pos->list_node); 182 182 } 183 183 184 184 return 0; ··· 527 527 return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); 528 528 } 529 529 530 - static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr) 530 + static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr) 531 531 { 532 532 unsigned long flags; 533 533 int ret; 534 534 535 - spin_lock_irqsave(&xs->pool->cq_lock, flags); 536 - ret = xskq_prod_reserve_addr(xs->pool->cq, addr); 537 - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 535 + spin_lock_irqsave(&pool->cq_lock, flags); 536 + ret = xskq_prod_reserve_addr(pool->cq, addr); 537 + spin_unlock_irqrestore(&pool->cq_lock, flags); 538 538 539 539 return ret; 540 540 } 541 541 542 - static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n) 542 + static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n) 543 543 { 544 544 unsigned long flags; 545 545 546 - spin_lock_irqsave(&xs->pool->cq_lock, flags); 547 - xskq_prod_submit_n(xs->pool->cq, n); 548 - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 546 + spin_lock_irqsave(&pool->cq_lock, flags); 547 + xskq_prod_submit_n(pool->cq, n); 548 + spin_unlock_irqrestore(&pool->cq_lock, flags); 549 549 } 550 550 551 - static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n) 551 + static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) 552 552 { 553 553 unsigned long flags; 554 554 555 - spin_lock_irqsave(&xs->pool->cq_lock, flags); 556 - xskq_prod_cancel_n(xs->pool->cq, n); 557 - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 555 + spin_lock_irqsave(&pool->cq_lock, flags); 556 + xskq_prod_cancel_n(pool->cq, n); 557 + spin_unlock_irqrestore(&pool->cq_lock, flags); 558 558 } 559 559 560 560 static u32 xsk_get_num_desc(struct sk_buff *skb) ··· 571 571 *compl->tx_timestamp = ktime_get_tai_fast_ns(); 572 572 } 573 573 574 - xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); 574 + xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb)); 575 575 sock_wfree(skb); 576 576 } 577 577 ··· 587 587 struct xdp_sock *xs = xdp_sk(skb->sk); 588 588 589 589 skb->destructor = sock_wfree; 590 - xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb)); 590 + xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb)); 591 591 /* Free skb without triggering the perf drop trace */ 592 592 consume_skb(skb); 593 593 xs->skb = NULL; ··· 765 765 xskq_cons_release(xs->tx); 766 766 } else { 767 767 /* Let application retry */ 768 - xsk_cq_cancel_locked(xs, 1); 768 + xsk_cq_cancel_locked(xs->pool, 1); 769 769 } 770 770 771 771 return ERR_PTR(err); ··· 802 802 * if there is space in it. This avoids having to implement 803 803 * any buffering in the Tx path. 804 804 */ 805 - if (xsk_cq_reserve_addr_locked(xs, desc.addr)) 805 + if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr)) 806 806 goto out; 807 807 808 808 skb = xsk_build_skb(xs, &desc);

+29 -25

net/xdp/xsk_buff_pool.c

··· 101 101 xskb = &pool->heads[i]; 102 102 xskb->pool = pool; 103 103 xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; 104 - INIT_LIST_HEAD(&xskb->free_list_node); 105 - INIT_LIST_HEAD(&xskb->xskb_list_node); 104 + INIT_LIST_HEAD(&xskb->list_node); 106 105 if (pool->unaligned) 107 106 pool->free_heads[i] = xskb; 108 107 else ··· 229 230 goto err_unreg_xsk; 230 231 } 231 232 pool->umem->zc = true; 233 + pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs; 232 234 return 0; 233 235 234 236 err_unreg_xsk: ··· 417 417 418 418 for (i = 0; i < pool->heads_cnt; i++) { 419 419 struct xdp_buff_xsk *xskb = &pool->heads[i]; 420 + u64 orig_addr; 420 421 421 - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); 422 + orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom; 423 + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr); 422 424 } 423 425 } 424 426 ··· 503 501 return *addr < pool->addrs_cnt; 504 502 } 505 503 504 + static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr) 505 + { 506 + struct xdp_buff_xsk *xskb; 507 + 508 + if (pool->unaligned) { 509 + xskb = pool->free_heads[--pool->free_heads_cnt]; 510 + xp_init_xskb_addr(xskb, pool, addr); 511 + if (pool->dma_pages) 512 + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); 513 + } else { 514 + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; 515 + } 516 + 517 + return xskb; 518 + } 519 + 506 520 static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) 507 521 { 508 522 struct xdp_buff_xsk *xskb; ··· 544 526 break; 545 527 } 546 528 547 - if (pool->unaligned) { 548 - xskb = pool->free_heads[--pool->free_heads_cnt]; 549 - xp_init_xskb_addr(xskb, pool, addr); 550 - if (pool->dma_pages) 551 - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); 552 - } else { 553 - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; 554 - } 529 + xskb = xp_get_xskb(pool, addr); 555 530 556 531 xskq_cons_release(pool->fq); 557 532 return xskb; ··· 561 550 } else { 562 551 pool->free_list_cnt--; 563 552 xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, 564 - free_list_node); 565 - list_del_init(&xskb->free_list_node); 553 + list_node); 554 + list_del_init(&xskb->list_node); 566 555 } 567 556 568 557 xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; ··· 602 591 continue; 603 592 } 604 593 605 - if (pool->unaligned) { 606 - xskb = pool->free_heads[--pool->free_heads_cnt]; 607 - xp_init_xskb_addr(xskb, pool, addr); 608 - if (pool->dma_pages) 609 - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); 610 - } else { 611 - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; 612 - } 594 + xskb = xp_get_xskb(pool, addr); 613 595 614 596 *xdp = &xskb->xdp; 615 597 xdp++; ··· 621 617 622 618 i = nb_entries; 623 619 while (i--) { 624 - xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); 625 - list_del_init(&xskb->free_list_node); 620 + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node); 621 + list_del_init(&xskb->list_node); 626 622 627 623 *xdp = &xskb->xdp; 628 624 xdp++; ··· 692 688 693 689 void xp_free(struct xdp_buff_xsk *xskb) 694 690 { 695 - if (!list_empty(&xskb->free_list_node)) 691 + if (!list_empty(&xskb->list_node)) 696 692 return; 697 693 698 694 xskb->pool->free_list_cnt++; 699 - list_add(&xskb->free_list_node, &xskb->pool->free_list); 695 + list_add(&xskb->list_node, &xskb->pool->free_list); 700 696 } 701 697 EXPORT_SYMBOL(xp_free); 702 698

+1 -1

net/xdp/xsk_queue.h

··· 260 260 nr_frags = 0; 261 261 } else { 262 262 nr_frags++; 263 - if (nr_frags == pool->netdev->xdp_zc_max_segs) { 263 + if (nr_frags == pool->xdp_zc_max_segs) { 264 264 nr_frags = 0; 265 265 break; 266 266 }

+552 -1

tools/include/uapi/linux/if_link.h

··· 462 462 463 463 /* Bridge section */ 464 464 465 + /** 466 + * DOC: Bridge enum definition 467 + * 468 + * Please *note* that the timer values in the following section are expected 469 + * in clock_t format, which is seconds multiplied by USER_HZ (generally 470 + * defined as 100). 471 + * 472 + * @IFLA_BR_FORWARD_DELAY 473 + * The bridge forwarding delay is the time spent in LISTENING state 474 + * (before moving to LEARNING) and in LEARNING state (before moving 475 + * to FORWARDING). Only relevant if STP is enabled. 476 + * 477 + * The valid values are between (2 * USER_HZ) and (30 * USER_HZ). 478 + * The default value is (15 * USER_HZ). 479 + * 480 + * @IFLA_BR_HELLO_TIME 481 + * The time between hello packets sent by the bridge, when it is a root 482 + * bridge or a designated bridge. Only relevant if STP is enabled. 483 + * 484 + * The valid values are between (1 * USER_HZ) and (10 * USER_HZ). 485 + * The default value is (2 * USER_HZ). 486 + * 487 + * @IFLA_BR_MAX_AGE 488 + * The hello packet timeout is the time until another bridge in the 489 + * spanning tree is assumed to be dead, after reception of its last hello 490 + * message. Only relevant if STP is enabled. 491 + * 492 + * The valid values are between (6 * USER_HZ) and (40 * USER_HZ). 493 + * The default value is (20 * USER_HZ). 494 + * 495 + * @IFLA_BR_AGEING_TIME 496 + * Configure the bridge's FDB entries aging time. It is the time a MAC 497 + * address will be kept in the FDB after a packet has been received from 498 + * that address. After this time has passed, entries are cleaned up. 499 + * Allow values outside the 802.1 standard specification for special cases: 500 + * 501 + * * 0 - entry never ages (all permanent) 502 + * * 1 - entry disappears (no persistence) 503 + * 504 + * The default value is (300 * USER_HZ). 505 + * 506 + * @IFLA_BR_STP_STATE 507 + * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off 508 + * (*IFLA_BR_STP_STATE* == 0) for this bridge. 509 + * 510 + * The default value is 0 (disabled). 511 + * 512 + * @IFLA_BR_PRIORITY 513 + * Set this bridge's spanning tree priority, used during STP root bridge 514 + * election. 515 + * 516 + * The valid values are between 0 and 65535. 517 + * 518 + * @IFLA_BR_VLAN_FILTERING 519 + * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off 520 + * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not 521 + * consider the VLAN tag when handling packets. 522 + * 523 + * The default value is 0 (disabled). 524 + * 525 + * @IFLA_BR_VLAN_PROTOCOL 526 + * Set the protocol used for VLAN filtering. 527 + * 528 + * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value 529 + * is 0x8100(802.1Q). 530 + * 531 + * @IFLA_BR_GROUP_FWD_MASK 532 + * The group forwarding mask. This is the bitmask that is applied to 533 + * decide whether to forward incoming frames destined to link-local 534 + * addresses (of the form 01:80:C2:00:00:0X). 535 + * 536 + * The default value is 0, which means the bridge does not forward any 537 + * link-local frames coming on this port. 538 + * 539 + * @IFLA_BR_ROOT_ID 540 + * The bridge root id, read only. 541 + * 542 + * @IFLA_BR_BRIDGE_ID 543 + * The bridge id, read only. 544 + * 545 + * @IFLA_BR_ROOT_PORT 546 + * The bridge root port, read only. 547 + * 548 + * @IFLA_BR_ROOT_PATH_COST 549 + * The bridge root path cost, read only. 550 + * 551 + * @IFLA_BR_TOPOLOGY_CHANGE 552 + * The bridge topology change, read only. 553 + * 554 + * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED 555 + * The bridge topology change detected, read only. 556 + * 557 + * @IFLA_BR_HELLO_TIMER 558 + * The bridge hello timer, read only. 559 + * 560 + * @IFLA_BR_TCN_TIMER 561 + * The bridge tcn timer, read only. 562 + * 563 + * @IFLA_BR_TOPOLOGY_CHANGE_TIMER 564 + * The bridge topology change timer, read only. 565 + * 566 + * @IFLA_BR_GC_TIMER 567 + * The bridge gc timer, read only. 568 + * 569 + * @IFLA_BR_GROUP_ADDR 570 + * Set the MAC address of the multicast group this bridge uses for STP. 571 + * The address must be a link-local address in standard Ethernet MAC address 572 + * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f]. 573 + * 574 + * The default value is 0. 575 + * 576 + * @IFLA_BR_FDB_FLUSH 577 + * Flush bridge's fdb dynamic entries. 578 + * 579 + * @IFLA_BR_MCAST_ROUTER 580 + * Set bridge's multicast router if IGMP snooping is enabled. 581 + * The valid values are: 582 + * 583 + * * 0 - disabled. 584 + * * 1 - automatic (queried). 585 + * * 2 - permanently enabled. 586 + * 587 + * The default value is 1. 588 + * 589 + * @IFLA_BR_MCAST_SNOOPING 590 + * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off 591 + * (*IFLA_BR_MCAST_SNOOPING* == 0). 592 + * 593 + * The default value is 1. 594 + * 595 + * @IFLA_BR_MCAST_QUERY_USE_IFADDR 596 + * If enabled use the bridge's own IP address as source address for IGMP 597 + * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0 598 + * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0). 599 + * 600 + * The default value is 0 (disabled). 601 + * 602 + * @IFLA_BR_MCAST_QUERIER 603 + * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable 604 + * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast 605 + * queries by the bridge. 606 + * 607 + * The default value is 0 (disabled). 608 + * 609 + * @IFLA_BR_MCAST_HASH_ELASTICITY 610 + * Set multicast database hash elasticity, It is the maximum chain length in 611 + * the multicast hash table. This attribute is *deprecated* and the value 612 + * is always 16. 613 + * 614 + * @IFLA_BR_MCAST_HASH_MAX 615 + * Set maximum size of the multicast hash table 616 + * 617 + * The default value is 4096, the value must be a power of 2. 618 + * 619 + * @IFLA_BR_MCAST_LAST_MEMBER_CNT 620 + * The Last Member Query Count is the number of Group-Specific Queries 621 + * sent before the router assumes there are no local members. The Last 622 + * Member Query Count is also the number of Group-and-Source-Specific 623 + * Queries sent before the router assumes there are no listeners for a 624 + * particular source. 625 + * 626 + * The default value is 2. 627 + * 628 + * @IFLA_BR_MCAST_STARTUP_QUERY_CNT 629 + * The Startup Query Count is the number of Queries sent out on startup, 630 + * separated by the Startup Query Interval. 631 + * 632 + * The default value is 2. 633 + * 634 + * @IFLA_BR_MCAST_LAST_MEMBER_INTVL 635 + * The Last Member Query Interval is the Max Response Time inserted into 636 + * Group-Specific Queries sent in response to Leave Group messages, and 637 + * is also the amount of time between Group-Specific Query messages. 638 + * 639 + * The default value is (1 * USER_HZ). 640 + * 641 + * @IFLA_BR_MCAST_MEMBERSHIP_INTVL 642 + * The interval after which the bridge will leave a group, if no membership 643 + * reports for this group are received. 644 + * 645 + * The default value is (260 * USER_HZ). 646 + * 647 + * @IFLA_BR_MCAST_QUERIER_INTVL 648 + * The interval between queries sent by other routers. if no queries are 649 + * seen after this delay has passed, the bridge will start to send its own 650 + * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled). 651 + * 652 + * The default value is (255 * USER_HZ). 653 + * 654 + * @IFLA_BR_MCAST_QUERY_INTVL 655 + * The Query Interval is the interval between General Queries sent by 656 + * the Querier. 657 + * 658 + * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ). 659 + * 660 + * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL 661 + * The Max Response Time used to calculate the Max Resp Code inserted 662 + * into the periodic General Queries. 663 + * 664 + * The default value is (10 * USER_HZ). 665 + * 666 + * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL 667 + * The interval between queries in the startup phase. 668 + * 669 + * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ). 670 + * 671 + * @IFLA_BR_NF_CALL_IPTABLES 672 + * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0) 673 + * iptables hooks on the bridge. 674 + * 675 + * The default value is 0 (disabled). 676 + * 677 + * @IFLA_BR_NF_CALL_IP6TABLES 678 + * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0) 679 + * ip6tables hooks on the bridge. 680 + * 681 + * The default value is 0 (disabled). 682 + * 683 + * @IFLA_BR_NF_CALL_ARPTABLES 684 + * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0) 685 + * arptables hooks on the bridge. 686 + * 687 + * The default value is 0 (disabled). 688 + * 689 + * @IFLA_BR_VLAN_DEFAULT_PVID 690 + * VLAN ID applied to untagged and priority-tagged incoming packets. 691 + * 692 + * The default value is 1. Setting to the special value 0 makes all ports of 693 + * this bridge not have a PVID by default, which means that they will 694 + * not accept VLAN-untagged traffic. 695 + * 696 + * @IFLA_BR_PAD 697 + * Bridge attribute padding type for netlink message. 698 + * 699 + * @IFLA_BR_VLAN_STATS_ENABLED 700 + * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable 701 + * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting. 702 + * 703 + * The default value is 0 (disabled). 704 + * 705 + * @IFLA_BR_MCAST_STATS_ENABLED 706 + * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable 707 + * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats 708 + * accounting. 709 + * 710 + * The default value is 0 (disabled). 711 + * 712 + * @IFLA_BR_MCAST_IGMP_VERSION 713 + * Set the IGMP version. 714 + * 715 + * The valid values are 2 and 3. The default value is 2. 716 + * 717 + * @IFLA_BR_MCAST_MLD_VERSION 718 + * Set the MLD version. 719 + * 720 + * The valid values are 1 and 2. The default value is 1. 721 + * 722 + * @IFLA_BR_VLAN_STATS_PER_PORT 723 + * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable 724 + * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting. 725 + * Can be changed only when there are no port VLANs configured. 726 + * 727 + * The default value is 0 (disabled). 728 + * 729 + * @IFLA_BR_MULTI_BOOLOPT 730 + * The multi_boolopt is used to control new boolean options to avoid adding 731 + * new netlink attributes. You can look at ``enum br_boolopt_id`` for those 732 + * options. 733 + * 734 + * @IFLA_BR_MCAST_QUERIER_STATE 735 + * Bridge mcast querier states, read only. 736 + * 737 + * @IFLA_BR_FDB_N_LEARNED 738 + * The number of dynamically learned FDB entries for the current bridge, 739 + * read only. 740 + * 741 + * @IFLA_BR_FDB_MAX_LEARNED 742 + * Set the number of max dynamically learned FDB entries for the current 743 + * bridge. 744 + */ 465 745 enum { 466 746 IFLA_BR_UNSPEC, 467 747 IFLA_BR_FORWARD_DELAY, ··· 791 511 IFLA_BR_VLAN_STATS_PER_PORT, 792 512 IFLA_BR_MULTI_BOOLOPT, 793 513 IFLA_BR_MCAST_QUERIER_STATE, 514 + IFLA_BR_FDB_N_LEARNED, 515 + IFLA_BR_FDB_MAX_LEARNED, 794 516 __IFLA_BR_MAX, 795 517 }; 796 518 ··· 803 521 __u8 addr[6]; /* ETH_ALEN */ 804 522 }; 805 523 524 + /** 525 + * DOC: Bridge mode enum definition 526 + * 527 + * @BRIDGE_MODE_HAIRPIN 528 + * Controls whether traffic may be sent back out of the port on which it 529 + * was received. This option is also called reflective relay mode, and is 530 + * used to support basic VEPA (Virtual Ethernet Port Aggregator) 531 + * capabilities. By default, this flag is turned off and the bridge will 532 + * not forward traffic back out of the receiving port. 533 + */ 806 534 enum { 807 535 BRIDGE_MODE_UNSPEC, 808 536 BRIDGE_MODE_HAIRPIN, 809 537 }; 810 538 539 + /** 540 + * DOC: Bridge port enum definition 541 + * 542 + * @IFLA_BRPORT_STATE 543 + * The operation state of the port. Here are the valid values. 544 + * 545 + * * 0 - port is in STP *DISABLED* state. Make this port completely 546 + * inactive for STP. This is also called BPDU filter and could be used 547 + * to disable STP on an untrusted port, like a leaf virtual device. 548 + * The traffic forwarding is also stopped on this port. 549 + * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled 550 + * on the bridge. In this state the port listens for STP BPDUs and 551 + * drops all other traffic frames. 552 + * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on 553 + * the bridge. In this state the port will accept traffic only for the 554 + * purpose of updating MAC address tables. 555 + * * 3 - port is in STP *FORWARDING* state. Port is fully active. 556 + * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on 557 + * the bridge. This state is used during the STP election process. 558 + * In this state, port will only process STP BPDUs. 559 + * 560 + * @IFLA_BRPORT_PRIORITY 561 + * The STP port priority. The valid values are between 0 and 255. 562 + * 563 + * @IFLA_BRPORT_COST 564 + * The STP path cost of the port. The valid values are between 1 and 65535. 565 + * 566 + * @IFLA_BRPORT_MODE 567 + * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details. 568 + * 569 + * @IFLA_BRPORT_GUARD 570 + * Controls whether STP BPDUs will be processed by the bridge port. By 571 + * default, the flag is turned off to allow BPDU processing. Turning this 572 + * flag on will disable the bridge port if a STP BPDU packet is received. 573 + * 574 + * If the bridge has Spanning Tree enabled, hostile devices on the network 575 + * may send BPDU on a port and cause network failure. Setting *guard on* 576 + * will detect and stop this by disabling the port. The port will be 577 + * restarted if the link is brought down, or removed and reattached. 578 + * 579 + * @IFLA_BRPORT_PROTECT 580 + * Controls whether a given port is allowed to become a root port or not. 581 + * Only used when STP is enabled on the bridge. By default the flag is off. 582 + * 583 + * This feature is also called root port guard. If BPDU is received from a 584 + * leaf (edge) port, it should not be elected as root port. This could 585 + * be used if using STP on a bridge and the downstream bridges are not fully 586 + * trusted; this prevents a hostile guest from rerouting traffic. 587 + * 588 + * @IFLA_BRPORT_FAST_LEAVE 589 + * This flag allows the bridge to immediately stop multicast traffic 590 + * forwarding on a port that receives an IGMP Leave message. It is only used 591 + * when IGMP snooping is enabled on the bridge. By default the flag is off. 592 + * 593 + * @IFLA_BRPORT_LEARNING 594 + * Controls whether a given port will learn *source* MAC addresses from 595 + * received traffic or not. Also controls whether dynamic FDB entries 596 + * (which can also be added by software) will be refreshed by incoming 597 + * traffic. By default this flag is on. 598 + * 599 + * @IFLA_BRPORT_UNICAST_FLOOD 600 + * Controls whether unicast traffic for which there is no FDB entry will 601 + * be flooded towards this port. By default this flag is on. 602 + * 603 + * @IFLA_BRPORT_PROXYARP 604 + * Enable proxy ARP on this port. 605 + * 606 + * @IFLA_BRPORT_LEARNING_SYNC 607 + * Controls whether a given port will sync MAC addresses learned on device 608 + * port to bridge FDB. 609 + * 610 + * @IFLA_BRPORT_PROXYARP_WIFI 611 + * Enable proxy ARP on this port which meets extended requirements by 612 + * IEEE 802.11 and Hotspot 2.0 specifications. 613 + * 614 + * @IFLA_BRPORT_ROOT_ID 615 + * 616 + * @IFLA_BRPORT_BRIDGE_ID 617 + * 618 + * @IFLA_BRPORT_DESIGNATED_PORT 619 + * 620 + * @IFLA_BRPORT_DESIGNATED_COST 621 + * 622 + * @IFLA_BRPORT_ID 623 + * 624 + * @IFLA_BRPORT_NO 625 + * 626 + * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK 627 + * 628 + * @IFLA_BRPORT_CONFIG_PENDING 629 + * 630 + * @IFLA_BRPORT_MESSAGE_AGE_TIMER 631 + * 632 + * @IFLA_BRPORT_FORWARD_DELAY_TIMER 633 + * 634 + * @IFLA_BRPORT_HOLD_TIMER 635 + * 636 + * @IFLA_BRPORT_FLUSH 637 + * Flush bridge ports' fdb dynamic entries. 638 + * 639 + * @IFLA_BRPORT_MULTICAST_ROUTER 640 + * Configure the port's multicast router presence. A port with 641 + * a multicast router will receive all multicast traffic. 642 + * The valid values are: 643 + * 644 + * * 0 disable multicast routers on this port 645 + * * 1 let the system detect the presence of routers (default) 646 + * * 2 permanently enable multicast traffic forwarding on this port 647 + * * 3 enable multicast routers temporarily on this port, not depending 648 + * on incoming queries. 649 + * 650 + * @IFLA_BRPORT_PAD 651 + * 652 + * @IFLA_BRPORT_MCAST_FLOOD 653 + * Controls whether a given port will flood multicast traffic for which 654 + * there is no MDB entry. By default this flag is on. 655 + * 656 + * @IFLA_BRPORT_MCAST_TO_UCAST 657 + * Controls whether a given port will replicate packets using unicast 658 + * instead of multicast. By default this flag is off. 659 + * 660 + * This is done by copying the packet per host and changing the multicast 661 + * destination MAC to a unicast one accordingly. 662 + * 663 + * *mcast_to_unicast* works on top of the multicast snooping feature of the 664 + * bridge. Which means unicast copies are only delivered to hosts which 665 + * are interested in unicast and signaled this via IGMP/MLD reports previously. 666 + * 667 + * This feature is intended for interface types which have a more reliable 668 + * and/or efficient way to deliver unicast packets than broadcast ones 669 + * (e.g. WiFi). 670 + * 671 + * However, it should only be enabled on interfaces where no IGMPv2/MLDv1 672 + * report suppression takes place. IGMP/MLD report suppression issue is 673 + * usually overcome by the network daemon (supplicant) enabling AP isolation 674 + * and by that separating all STAs. 675 + * 676 + * Delivery of STA-to-STA IP multicast is made possible again by enabling 677 + * and utilizing the bridge hairpin mode, which considers the incoming port 678 + * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option). 679 + * Hairpin mode is performed after multicast snooping, therefore leading 680 + * to only deliver reports to STAs running a multicast router. 681 + * 682 + * @IFLA_BRPORT_VLAN_TUNNEL 683 + * Controls whether vlan to tunnel mapping is enabled on the port. 684 + * By default this flag is off. 685 + * 686 + * @IFLA_BRPORT_BCAST_FLOOD 687 + * Controls flooding of broadcast traffic on the given port. By default 688 + * this flag is on. 689 + * 690 + * @IFLA_BRPORT_GROUP_FWD_MASK 691 + * Set the group forward mask. This is a bitmask that is applied to 692 + * decide whether to forward incoming frames destined to link-local 693 + * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults 694 + * to 0, which means the bridge does not forward any link-local frames 695 + * coming on this port). 696 + * 697 + * @IFLA_BRPORT_NEIGH_SUPPRESS 698 + * Controls whether neighbor discovery (arp and nd) proxy and suppression 699 + * is enabled on the port. By default this flag is off. 700 + * 701 + * @IFLA_BRPORT_ISOLATED 702 + * Controls whether a given port will be isolated, which means it will be 703 + * able to communicate with non-isolated ports only. By default this 704 + * flag is off. 705 + * 706 + * @IFLA_BRPORT_BACKUP_PORT 707 + * Set a backup port. If the port loses carrier all traffic will be 708 + * redirected to the configured backup port. Set the value to 0 to disable 709 + * it. 710 + * 711 + * @IFLA_BRPORT_MRP_RING_OPEN 712 + * 713 + * @IFLA_BRPORT_MRP_IN_OPEN 714 + * 715 + * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT 716 + * The number of per-port EHT hosts limit. The default value is 512. 717 + * Setting to 0 is not allowed. 718 + * 719 + * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT 720 + * The current number of tracked hosts, read only. 721 + * 722 + * @IFLA_BRPORT_LOCKED 723 + * Controls whether a port will be locked, meaning that hosts behind the 724 + * port will not be able to communicate through the port unless an FDB 725 + * entry with the unit's MAC address is in the FDB. The common use case is 726 + * that hosts are allowed access through authentication with the IEEE 802.1X 727 + * protocol or based on whitelists. By default this flag is off. 728 + * 729 + * Please note that secure 802.1X deployments should always use the 730 + * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its 731 + * FDB based on link-local (EAPOL) traffic received on the port. 732 + * 733 + * @IFLA_BRPORT_MAB 734 + * Controls whether a port will use MAC Authentication Bypass (MAB), a 735 + * technique through which select MAC addresses may be allowed on a locked 736 + * port, without using 802.1X authentication. Packets with an unknown source 737 + * MAC address generates a "locked" FDB entry on the incoming bridge port. 738 + * The common use case is for user space to react to these bridge FDB 739 + * notifications and optionally replace the locked FDB entry with a normal 740 + * one, allowing traffic to pass for whitelisted MAC addresses. 741 + * 742 + * Setting this flag also requires *IFLA_BRPORT_LOCKED* and 743 + * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized 744 + * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic 745 + * FDB entries installed by user space (as replacements for the locked FDB 746 + * entries) to be refreshed and/or aged out. 747 + * 748 + * @IFLA_BRPORT_MCAST_N_GROUPS 749 + * 750 + * @IFLA_BRPORT_MCAST_MAX_GROUPS 751 + * Sets the maximum number of MDB entries that can be registered for a 752 + * given port. Attempts to register more MDB entries at the port than this 753 + * limit allows will be rejected, whether they are done through netlink 754 + * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a 755 + * limit of 0 disables the limit. The default value is 0. 756 + * 757 + * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS 758 + * Controls whether neighbor discovery (arp and nd) proxy and suppression is 759 + * enabled for a given port. By default this flag is off. 760 + * 761 + * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS* 762 + * is enabled for a given port. 763 + * 764 + * @IFLA_BRPORT_BACKUP_NHID 765 + * The FDB nexthop object ID to attach to packets being redirected to a 766 + * backup port that has VLAN tunnel mapping enabled (via the 767 + * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has 768 + * the effect of not attaching any ID. 769 + */ 811 770 enum { 812 771 IFLA_BRPORT_UNSPEC, 813 772 IFLA_BRPORT_STATE, /* Spanning tree state */ ··· 1293 770 NETKIT_L3, 1294 771 }; 1295 772 773 + /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to 774 + * the BPF program if attached. This also means the latter can 775 + * consume the two fields if they were populated earlier. 776 + * 777 + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before 778 + * invoking the attached BPF program when the peer device resides 779 + * in a different network namespace. This is the default behavior. 780 + */ 781 + enum netkit_scrub { 782 + NETKIT_SCRUB_NONE, 783 + NETKIT_SCRUB_DEFAULT, 784 + }; 785 + 1296 786 enum { 1297 787 IFLA_NETKIT_UNSPEC, 1298 788 IFLA_NETKIT_PEER_INFO, ··· 1313 777 IFLA_NETKIT_POLICY, 1314 778 IFLA_NETKIT_PEER_POLICY, 1315 779 IFLA_NETKIT_MODE, 780 + IFLA_NETKIT_SCRUB, 781 + IFLA_NETKIT_PEER_SCRUB, 1316 782 __IFLA_NETKIT_MAX, 1317 783 }; 1318 784 #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) ··· 1393 855 IFLA_VXLAN_DF, 1394 856 IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ 1395 857 IFLA_VXLAN_LOCALBYPASS, 858 + IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ 1396 859 __IFLA_VXLAN_MAX 1397 860 }; 1398 861 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) ··· 1409 870 VXLAN_DF_INHERIT, 1410 871 __VXLAN_DF_END, 1411 872 VXLAN_DF_MAX = __VXLAN_DF_END - 1, 873 + }; 874 + 875 + enum ifla_vxlan_label_policy { 876 + VXLAN_LABEL_FIXED = 0, 877 + VXLAN_LABEL_INHERIT = 1, 878 + __VXLAN_LABEL_END, 879 + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1, 1412 880 }; 1413 881 1414 882 /* GENEVE section */ ··· 1482 936 IFLA_GTP_ROLE, 1483 937 IFLA_GTP_CREATE_SOCKETS, 1484 938 IFLA_GTP_RESTART_COUNT, 939 + IFLA_GTP_LOCAL, 940 + IFLA_GTP_LOCAL6, 1485 941 __IFLA_GTP_MAX, 1486 942 }; 1487 943 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) ··· 1789 1241 IFLA_HSR_PROTOCOL, /* Indicate different protocol than 1790 1242 * HSR. For example PRP. 1791 1243 */ 1244 + IFLA_HSR_INTERLINK, /* HSR interlink network device */ 1792 1245 __IFLA_HSR_MAX, 1793 1246 }; 1794 1247 ··· 1967 1418 1968 1419 enum { 1969 1420 IFLA_DSA_UNSPEC, 1970 - IFLA_DSA_MASTER, 1421 + IFLA_DSA_CONDUIT, 1422 + /* Deprecated, use IFLA_DSA_CONDUIT instead */ 1423 + IFLA_DSA_MASTER = IFLA_DSA_CONDUIT, 1971 1424 __IFLA_DSA_MAX, 1972 1425 }; 1973 1426

+1

tools/testing/selftests/bpf/network_helpers.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #ifndef __NETWORK_HELPERS_H 3 3 #define __NETWORK_HELPERS_H 4 + #include <arpa/inet.h> 4 5 #include <sys/socket.h> 5 6 #include <sys/types.h> 6 7 #include <linux/types.h>

+121

tools/testing/selftests/bpf/prog_tests/mptcp.c

··· 5 5 #include <linux/const.h> 6 6 #include <netinet/in.h> 7 7 #include <test_progs.h> 8 + #include <unistd.h> 8 9 #include "cgroup_helpers.h" 9 10 #include "network_helpers.h" 10 11 #include "mptcp_sock.skel.h" 11 12 #include "mptcpify.skel.h" 13 + #include "mptcp_subflow.skel.h" 12 14 13 15 #define NS_TEST "mptcp_ns" 16 + #define ADDR_1 "10.0.1.1" 17 + #define ADDR_2 "10.0.1.2" 18 + #define PORT_1 10001 14 19 15 20 #ifndef IPPROTO_MPTCP 16 21 #define IPPROTO_MPTCP 262 ··· 340 335 close(cgroup_fd); 341 336 } 342 337 338 + static int endpoint_init(char *flags) 339 + { 340 + SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST); 341 + SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1); 342 + SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST); 343 + SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2); 344 + SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST); 345 + if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) { 346 + printf("'ip mptcp' not supported, skip this test.\n"); 347 + test__skip(); 348 + goto fail; 349 + } 350 + 351 + return 0; 352 + fail: 353 + return -1; 354 + } 355 + 356 + static void wait_for_new_subflows(int fd) 357 + { 358 + socklen_t len; 359 + u8 subflows; 360 + int err, i; 361 + 362 + len = sizeof(subflows); 363 + /* Wait max 5 sec for new subflows to be created */ 364 + for (i = 0; i < 50; i++) { 365 + err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len); 366 + if (!err && subflows > 0) 367 + break; 368 + 369 + usleep(100000); /* 0.1s */ 370 + } 371 + } 372 + 373 + static void run_subflow(void) 374 + { 375 + int server_fd, client_fd, err; 376 + char new[TCP_CA_NAME_MAX]; 377 + char cc[TCP_CA_NAME_MAX]; 378 + unsigned int mark; 379 + socklen_t len; 380 + 381 + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); 382 + if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) 383 + return; 384 + 385 + client_fd = connect_to_fd(server_fd, 0); 386 + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) 387 + goto close_server; 388 + 389 + send_byte(client_fd); 390 + wait_for_new_subflows(client_fd); 391 + 392 + len = sizeof(mark); 393 + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len); 394 + if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)")) 395 + ASSERT_EQ(mark, 0, "mark"); 396 + 397 + len = sizeof(new); 398 + err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len); 399 + if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) { 400 + get_msk_ca_name(cc); 401 + ASSERT_STREQ(new, cc, "cc"); 402 + } 403 + 404 + close(client_fd); 405 + close_server: 406 + close(server_fd); 407 + } 408 + 409 + static void test_subflow(void) 410 + { 411 + struct mptcp_subflow *skel; 412 + struct nstoken *nstoken; 413 + int cgroup_fd; 414 + 415 + cgroup_fd = test__join_cgroup("/mptcp_subflow"); 416 + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow")) 417 + return; 418 + 419 + skel = mptcp_subflow__open_and_load(); 420 + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow")) 421 + goto close_cgroup; 422 + 423 + skel->bss->pid = getpid(); 424 + 425 + skel->links.mptcp_subflow = 426 + bpf_program__attach_cgroup(skel->progs.mptcp_subflow, cgroup_fd); 427 + if (!ASSERT_OK_PTR(skel->links.mptcp_subflow, "attach mptcp_subflow")) 428 + goto skel_destroy; 429 + 430 + skel->links._getsockopt_subflow = 431 + bpf_program__attach_cgroup(skel->progs._getsockopt_subflow, cgroup_fd); 432 + if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow")) 433 + goto skel_destroy; 434 + 435 + nstoken = create_netns(); 436 + if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow")) 437 + goto skel_destroy; 438 + 439 + if (endpoint_init("subflow") < 0) 440 + goto close_netns; 441 + 442 + run_subflow(); 443 + 444 + close_netns: 445 + cleanup_netns(nstoken); 446 + skel_destroy: 447 + mptcp_subflow__destroy(skel); 448 + close_cgroup: 449 + close(cgroup_fd); 450 + } 451 + 343 452 void test_mptcp(void) 344 453 { 345 454 if (test__start_subtest("base")) 346 455 test_base(); 347 456 if (test__start_subtest("mptcpify")) 348 457 test_mptcpify(); 458 + if (test__start_subtest("subflow")) 459 + test_subflow(); 349 460 }

+22 -7

tools/testing/selftests/bpf/prog_tests/netns_cookie.c

··· 8 8 #define SO_NETNS_COOKIE 71 9 9 #endif 10 10 11 + #define loopback 1 12 + 11 13 static int duration; 12 14 13 15 void test_netns_cookie(void) 14 16 { 17 + LIBBPF_OPTS(bpf_prog_attach_opts, opta); 18 + LIBBPF_OPTS(bpf_prog_detach_opts, optd); 15 19 int server_fd = -1, client_fd = -1, cgroup_fd = -1; 16 - int err, val, ret, map, verdict; 20 + int err, val, ret, map, verdict, tc_fd; 17 21 struct netns_cookie_prog *skel; 18 22 uint64_t cookie_expected_value; 19 23 socklen_t vallen = sizeof(cookie_expected_value); ··· 42 38 if (!ASSERT_OK(err, "prog_attach")) 43 39 goto done; 44 40 41 + tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); 42 + err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); 43 + if (!ASSERT_OK(err, "prog_attach")) 44 + goto done; 45 + 45 46 server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); 46 47 if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) 47 - goto done; 48 + goto cleanup_tc; 48 49 49 50 client_fd = connect_to_fd(server_fd, 0); 50 51 if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno)) 51 - goto done; 52 + goto cleanup_tc; 52 53 53 54 ret = send(client_fd, send_msg, sizeof(send_msg), 0); 54 55 if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret)) 55 - goto done; 56 + goto cleanup_tc; 56 57 57 58 err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies), 58 59 &client_fd, &val); 59 60 if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)")) 60 - goto done; 61 + goto cleanup_tc; 61 62 62 63 err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE, 63 64 &cookie_expected_value, &vallen); 64 65 if (!ASSERT_OK(err, "getsockopt")) 65 - goto done; 66 + goto cleanup_tc; 66 67 67 68 ASSERT_EQ(val, cookie_expected_value, "cookie_value"); 68 69 69 70 err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), 70 71 &client_fd, &val); 71 72 if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) 72 - goto done; 73 + goto cleanup_tc; 73 74 74 75 ASSERT_EQ(val, cookie_expected_value, "cookie_value"); 76 + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); 77 + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); 78 + 79 + cleanup_tc: 80 + err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd); 81 + ASSERT_OK(err, "prog_detach"); 75 82 76 83 done: 77 84 if (server_fd != -1)

+85 -9

tools/testing/selftests/bpf/prog_tests/tc_netkit.c

··· 14 14 #include "netlink_helpers.h" 15 15 #include "tc_helpers.h" 16 16 17 - #define ICMP_ECHO 8 17 + #define MARK 42 18 + #define PRIO 0xeb9f 19 + #define ICMP_ECHO 8 18 20 19 21 struct icmphdr { 20 22 __u8 type; ··· 35 33 }; 36 34 37 35 static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, 38 - bool same_netns) 36 + bool same_netns, int scrub, int peer_scrub) 39 37 { 40 38 struct rtnl_handle rth = { .fd = -1 }; 41 39 struct iplink_req req = {}; ··· 60 58 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); 61 59 addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy); 62 60 addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy); 61 + addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); 62 + addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); 63 63 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); 64 64 addattr_nest_end(&req.n, data); 65 65 addattr_nest_end(&req.n, linkinfo); ··· 122 118 123 119 static int __send_icmp(__u32 dest) 124 120 { 121 + int sock, ret, mark = MARK, prio = PRIO; 125 122 struct sockaddr_in addr; 126 123 struct icmphdr icmp; 127 - int sock, ret; 128 124 129 125 ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); 130 126 if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)")) ··· 137 133 ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, 138 134 netkit_name, strlen(netkit_name) + 1); 139 135 if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)")) 136 + goto out; 137 + 138 + ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 139 + if (!ASSERT_OK(ret, "setsockopt(SO_MARK)")) 140 + goto out; 141 + 142 + ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY, 143 + &prio, sizeof(prio)); 144 + if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)")) 140 145 goto out; 141 146 142 147 memset(&addr, 0, sizeof(addr)); ··· 184 171 int err, ifindex; 185 172 186 173 err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, 187 - &ifindex, false); 174 + &ifindex, false, NETKIT_SCRUB_DEFAULT, 175 + NETKIT_SCRUB_DEFAULT); 188 176 if (err) 189 177 return; 190 178 ··· 299 285 int err, ifindex; 300 286 301 287 err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, 302 - &ifindex, false); 288 + &ifindex, false, NETKIT_SCRUB_DEFAULT, 289 + NETKIT_SCRUB_DEFAULT); 303 290 if (err) 304 291 return; 305 292 ··· 428 413 int err, ifindex; 429 414 430 415 err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, 431 - &ifindex, false); 416 + &ifindex, false, NETKIT_SCRUB_DEFAULT, 417 + NETKIT_SCRUB_DEFAULT); 432 418 if (err) 433 419 return; 434 420 ··· 543 527 int err, ifindex, ifindex2; 544 528 545 529 err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, 546 - &ifindex, true); 530 + &ifindex, true, NETKIT_SCRUB_DEFAULT, 531 + NETKIT_SCRUB_DEFAULT); 547 532 if (err) 548 533 return; 549 534 ··· 655 638 int err, ifindex; 656 639 657 640 err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, 658 - &ifindex, false); 641 + &ifindex, false, NETKIT_SCRUB_DEFAULT, 642 + NETKIT_SCRUB_DEFAULT); 659 643 if (err) 660 644 return; 661 645 ··· 733 715 struct bpf_link *link; 734 716 735 717 err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, 736 - &ifindex, true); 718 + &ifindex, true, NETKIT_SCRUB_DEFAULT, 719 + NETKIT_SCRUB_DEFAULT); 737 720 if (err) 738 721 return; 739 722 ··· 797 778 { 798 779 serial_test_tc_netkit_pkt_type_mode(NETKIT_L2); 799 780 serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); 781 + } 782 + 783 + static void serial_test_tc_netkit_scrub_type(int scrub) 784 + { 785 + LIBBPF_OPTS(bpf_netkit_opts, optl); 786 + struct test_tc_link *skel; 787 + struct bpf_link *link; 788 + int err, ifindex; 789 + 790 + err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, 791 + &ifindex, false, scrub, scrub); 792 + if (err) 793 + return; 794 + 795 + skel = test_tc_link__open(); 796 + if (!ASSERT_OK_PTR(skel, "skel_open")) 797 + goto cleanup; 798 + 799 + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8, 800 + BPF_NETKIT_PRIMARY), 0, "tc8_attach_type"); 801 + 802 + err = test_tc_link__load(skel); 803 + if (!ASSERT_OK(err, "skel_load")) 804 + goto cleanup; 805 + 806 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); 807 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); 808 + 809 + ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8"); 810 + 811 + link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl); 812 + if (!ASSERT_OK_PTR(link, "link_attach")) 813 + goto cleanup; 814 + 815 + skel->links.tc8 = link; 816 + 817 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); 818 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); 819 + 820 + tc_skel_reset_all_seen(skel); 821 + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); 822 + 823 + ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); 824 + ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); 825 + ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); 826 + cleanup: 827 + test_tc_link__destroy(skel); 828 + 829 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); 830 + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); 831 + destroy_netkit(); 832 + } 833 + 834 + void serial_test_tc_netkit_scrub(void) 835 + { 836 + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); 837 + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); 800 838 }

+36 -8

tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

··· 2 2 #include <uapi/linux/bpf.h> 3 3 #include <linux/if_link.h> 4 4 #include <test_progs.h> 5 + #include <network_helpers.h> 5 6 6 7 #include "test_xdp_with_cpumap_frags_helpers.skel.h" 7 8 #include "test_xdp_with_cpumap_helpers.skel.h" 8 9 9 10 #define IFINDEX_LO 1 11 + #define TEST_NS "cpu_attach_ns" 10 12 11 13 static void test_xdp_with_cpumap_helpers(void) 12 14 { 13 - struct test_xdp_with_cpumap_helpers *skel; 15 + struct test_xdp_with_cpumap_helpers *skel = NULL; 14 16 struct bpf_prog_info info = {}; 15 17 __u32 len = sizeof(info); 16 18 struct bpf_cpumap_val val = { 17 19 .qsize = 192, 18 20 }; 19 - int err, prog_fd, map_fd; 21 + int err, prog_fd, prog_redir_fd, map_fd; 22 + struct nstoken *nstoken = NULL; 20 23 __u32 idx = 0; 24 + 25 + SYS(out_close, "ip netns add %s", TEST_NS); 26 + nstoken = open_netns(TEST_NS); 27 + if (!ASSERT_OK_PTR(nstoken, "open_netns")) 28 + goto out_close; 29 + SYS(out_close, "ip link set dev lo up"); 21 30 22 31 skel = test_xdp_with_cpumap_helpers__open_and_load(); 23 32 if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) 24 33 return; 25 34 26 - prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); 27 - err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); 35 + prog_redir_fd = bpf_program__fd(skel->progs.xdp_redir_prog); 36 + err = bpf_xdp_attach(IFINDEX_LO, prog_redir_fd, XDP_FLAGS_SKB_MODE, NULL); 28 37 if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) 29 38 goto out_close; 30 - 31 - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); 32 - ASSERT_OK(err, "XDP program detach"); 33 39 34 40 prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); 35 41 map_fd = bpf_map__fd(skel->maps.cpu_map); ··· 50 44 err = bpf_map_lookup_elem(map_fd, &idx, &val); 51 45 ASSERT_OK(err, "Read cpumap entry"); 52 46 ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); 47 + 48 + /* send a packet to trigger any potential bugs in there */ 49 + char data[10] = {}; 50 + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 51 + .data_in = &data, 52 + .data_size_in = 10, 53 + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, 54 + .repeat = 1, 55 + ); 56 + err = bpf_prog_test_run_opts(prog_redir_fd, &opts); 57 + ASSERT_OK(err, "XDP test run"); 58 + 59 + /* wait for the packets to be flushed, then check that redirect has been 60 + * performed 61 + */ 62 + kern_sync_rcu(); 63 + ASSERT_NEQ(skel->bss->redirect_count, 0, "redirected packets"); 64 + 65 + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); 66 + ASSERT_OK(err, "XDP program detach"); 53 67 54 68 /* can not attach BPF_XDP_CPUMAP program to a device */ 55 69 err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); ··· 91 65 ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); 92 66 93 67 out_close: 68 + close_netns(nstoken); 69 + SYS_NOFAIL("ip netns del %s", TEST_NS); 94 70 test_xdp_with_cpumap_helpers__destroy(skel); 95 71 } 96 72 ··· 139 111 test_xdp_with_cpumap_frags_helpers__destroy(skel); 140 112 } 141 113 142 - void serial_test_xdp_cpumap_attach(void) 114 + void test_xdp_cpumap_attach(void) 143 115 { 144 116 if (test__start_subtest("CPUMAP with programs in entries")) 145 117 test_xdp_with_cpumap_helpers();

+42

tools/testing/selftests/bpf/progs/mptcp_bpf.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + #ifndef __MPTCP_BPF_H__ 3 + #define __MPTCP_BPF_H__ 4 + 5 + #include "bpf_experimental.h" 6 + 7 + /* list helpers from include/linux/list.h */ 8 + static inline int list_is_head(const struct list_head *list, 9 + const struct list_head *head) 10 + { 11 + return list == head; 12 + } 13 + 14 + #define list_entry(ptr, type, member) \ 15 + container_of(ptr, type, member) 16 + 17 + #define list_first_entry(ptr, type, member) \ 18 + list_entry((ptr)->next, type, member) 19 + 20 + #define list_next_entry(pos, member) \ 21 + list_entry((pos)->member.next, typeof(*(pos)), member) 22 + 23 + #define list_entry_is_head(pos, head, member) \ 24 + list_is_head(&pos->member, (head)) 25 + 26 + /* small difference: 'can_loop' has been added in the conditions */ 27 + #define list_for_each_entry(pos, head, member) \ 28 + for (pos = list_first_entry(head, typeof(*pos), member); \ 29 + !list_entry_is_head(pos, head, member) && can_loop; \ 30 + pos = list_next_entry(pos, member)) 31 + 32 + /* mptcp helpers from protocol.h */ 33 + #define mptcp_for_each_subflow(__msk, __subflow) \ 34 + list_for_each_entry(__subflow, &((__msk)->conn_list), node) 35 + 36 + static __always_inline struct sock * 37 + mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) 38 + { 39 + return subflow->tcp_sock; 40 + } 41 + 42 + #endif

+128

tools/testing/selftests/bpf/progs/mptcp_subflow.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020, Tessares SA. */ 3 + /* Copyright (c) 2024, Kylin Software */ 4 + 5 + /* vmlinux.h, bpf_helpers.h and other 'define' */ 6 + #include "bpf_tracing_net.h" 7 + #include "mptcp_bpf.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + char cc[TCP_CA_NAME_MAX] = "reno"; 12 + int pid; 13 + 14 + /* Associate a subflow counter to each token */ 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_HASH); 17 + __uint(key_size, sizeof(__u32)); 18 + __uint(value_size, sizeof(__u32)); 19 + __uint(max_entries, 100); 20 + } mptcp_sf SEC(".maps"); 21 + 22 + SEC("sockops") 23 + int mptcp_subflow(struct bpf_sock_ops *skops) 24 + { 25 + __u32 init = 1, key, mark, *cnt; 26 + struct mptcp_sock *msk; 27 + struct bpf_sock *sk; 28 + int err; 29 + 30 + if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) 31 + return 1; 32 + 33 + sk = skops->sk; 34 + if (!sk) 35 + return 1; 36 + 37 + msk = bpf_skc_to_mptcp_sock(sk); 38 + if (!msk) 39 + return 1; 40 + 41 + key = msk->token; 42 + cnt = bpf_map_lookup_elem(&mptcp_sf, &key); 43 + if (cnt) { 44 + /* A new subflow is added to an existing MPTCP connection */ 45 + __sync_fetch_and_add(cnt, 1); 46 + mark = *cnt; 47 + } else { 48 + /* A new MPTCP connection is just initiated and this is its primary subflow */ 49 + bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY); 50 + mark = init; 51 + } 52 + 53 + /* Set the mark of the subflow's socket based on appearance order */ 54 + err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 55 + if (err < 0) 56 + return 1; 57 + if (mark == 2) 58 + err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX); 59 + 60 + return 1; 61 + } 62 + 63 + static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) 64 + { 65 + struct mptcp_subflow_context *subflow; 66 + int i = 0; 67 + 68 + mptcp_for_each_subflow(msk, subflow) { 69 + struct sock *ssk; 70 + 71 + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, 72 + struct mptcp_subflow_context)); 73 + 74 + if (ssk->sk_mark != ++i) { 75 + ctx->retval = -2; 76 + break; 77 + } 78 + } 79 + 80 + return 1; 81 + } 82 + 83 + static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) 84 + { 85 + struct mptcp_subflow_context *subflow; 86 + 87 + mptcp_for_each_subflow(msk, subflow) { 88 + struct inet_connection_sock *icsk; 89 + struct sock *ssk; 90 + 91 + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, 92 + struct mptcp_subflow_context)); 93 + icsk = bpf_core_cast(ssk, struct inet_connection_sock); 94 + 95 + if (ssk->sk_mark == 2 && 96 + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) { 97 + ctx->retval = -2; 98 + break; 99 + } 100 + } 101 + 102 + return 1; 103 + } 104 + 105 + SEC("cgroup/getsockopt") 106 + int _getsockopt_subflow(struct bpf_sockopt *ctx) 107 + { 108 + struct bpf_sock *sk = ctx->sk; 109 + struct mptcp_sock *msk; 110 + 111 + if (bpf_get_current_pid_tgid() >> 32 != pid) 112 + return 1; 113 + 114 + if (!sk || sk->protocol != IPPROTO_MPTCP || 115 + (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) && 116 + !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION))) 117 + return 1; 118 + 119 + msk = bpf_core_cast(sk, struct mptcp_sock); 120 + if (msk->pm.subflows != 1) { 121 + ctx->retval = -1; 122 + return 1; 123 + } 124 + 125 + if (ctx->optname == SO_MARK) 126 + return _check_getsockopt_subflow_mark(msk, ctx); 127 + return _check_getsockopt_subflow_cc(msk, ctx); 128 + }

+10

tools/testing/selftests/bpf/progs/netns_cookie_prog.c

··· 27 27 __type(value, __u64); 28 28 } sock_map SEC(".maps"); 29 29 30 + int tcx_init_netns_cookie, tcx_netns_cookie; 31 + 30 32 SEC("sockops") 31 33 int get_netns_cookie_sockops(struct bpf_sock_ops *ctx) 32 34 { ··· 81 79 *cookie = bpf_get_netns_cookie(msg); 82 80 83 81 return 1; 82 + } 83 + 84 + SEC("tcx/ingress") 85 + int get_netns_cookie_tcx(struct __sk_buff *skb) 86 + { 87 + tcx_init_netns_cookie = bpf_get_netns_cookie(NULL); 88 + tcx_netns_cookie = bpf_get_netns_cookie(skb); 89 + return TCX_PASS; 84 90 } 85 91 86 92 char _license[] SEC("license") = "GPL";

+12

tools/testing/selftests/bpf/progs/test_tc_link.c

··· 18 18 bool seen_tc5; 19 19 bool seen_tc6; 20 20 bool seen_tc7; 21 + bool seen_tc8; 21 22 22 23 bool set_type; 23 24 24 25 bool seen_eth; 25 26 bool seen_host; 26 27 bool seen_mcast; 28 + 29 + int mark, prio; 27 30 28 31 SEC("tc/ingress") 29 32 int tc1(struct __sk_buff *skb) ··· 101 98 } 102 99 out: 103 100 seen_tc7 = true; 101 + return TCX_PASS; 102 + } 103 + 104 + SEC("tc/egress") 105 + int tc8(struct __sk_buff *skb) 106 + { 107 + seen_tc8 = true; 108 + mark = skb->mark; 109 + prio = skb->priority; 104 110 return TCX_PASS; 105 111 }

+6 -1

tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

··· 12 12 __uint(max_entries, 4); 13 13 } cpu_map SEC(".maps"); 14 14 15 + __u32 redirect_count = 0; 16 + 15 17 SEC("xdp") 16 18 int xdp_redir_prog(struct xdp_md *ctx) 17 19 { 18 - return bpf_redirect_map(&cpu_map, 1, 0); 20 + return bpf_redirect_map(&cpu_map, 0, 0); 19 21 } 20 22 21 23 SEC("xdp") ··· 29 27 SEC("xdp/cpumap") 30 28 int xdp_dummy_cm(struct xdp_md *ctx) 31 29 { 30 + if (bpf_get_smp_processor_id() == 0) 31 + redirect_count++; 32 + 32 33 if (ctx->ingress_ifindex == IFINDEX_LO) 33 34 return XDP_DROP; 34 35

Configure Feed

Configure Feed