Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

- Fix sk_local_storage diag dump via netlink (Amery Hung)

- Fix off-by-one in arena direct-value access (Junyoung Jang)

- Reject TCP_NODELAY in bpf-tcp congestion control (KaFai Wan)

- Fix type confusion in bpf_*_sock() (Kuniyuki Iwashima)

- Reject TX-only AF_XDP sockets (Linpu Yu)

- Don't run arg-tracking analysis twice on main subprog (Paul Chaignon)

- Fix NULL pointer dereference in bpf_sk_storage_clone and fib lookup
(Weiming Shi)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Fix off-by-one boundary validation in arena direct-value access
xskmap: reject TX-only AF_XDP sockets
bpf: Don't run arg-tracking analysis twice on main subprog
bpf: Free reuseport cBPF prog after RCU grace period.
bpf: tcp: Fix type confusion in sol_tcp_sockopt().
bpf: tcp: Fix type confusion in bpf_skc_to_tcp6_sock().
bpf: tcp: Fix type confusion in bpf_skc_to_tcp_sock().
mptcp: bpf: Fix type confusion in bpf_mptcp_sock_from_subflow()
selftest: bpf: Add test for bpf_tcp_sock() and RAW socket.
bpf: tcp: Fix type confusion in bpf_tcp_sock().
tools/headers: Regenerate stddef.h to fix BPF selftests
bpf: Fix sk_local_storage diag dumping uninitialized special fields
bpf: Fix NULL pointer dereference in bpf_skb_fib_lookup()
sockmap: Fix sk_psock_drop() race vs sock_map_{unhash,close,destroy}().
bpf: Fix NULL pointer dereference in bpf_sk_storage_clone and diag paths
selftests/bpf: Verify bpf-tcp-cc rejects TCP_NODELAY
selftests/bpf: Test TCP_NODELAY in TCP hdr opt callbacks
bpf: Reject TCP_NODELAY in bpf-tcp-cc
bpf: Reject TCP_NODELAY in TCP header option callbacks

Linus Torvalds 1 month ago 515186b7 1bfaee9d

+189 -51

16 changed files

expand all collapse all

include

linux

bpf.h

kernel

bpf

arena.c

liveness.c

net

core

bpf_sk_storage.c

filter.c

sock_map.c

ipv4

bpf_tcp_ca.c

mptcp

bpf.c

xdp

xskmap.c

tools

include

uapi

linux

stddef.h

testing

selftests

bpf

prog_tests

bpf_tcp_ca.c

sockopt_sk.c

tcp_hdr_options.c

progs

bpf_cubic.c

sockopt_sk.c

test_misc_tcp_hdr_options.c

include/linux/bpf.h

reviewed

··· 3725 3725 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; 3726 3726 extern const struct bpf_func_proto bpf_sk_setsockopt_proto; 3727 3727 extern const struct bpf_func_proto bpf_sk_getsockopt_proto; 3728 3728 + extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto; 3728 3729 extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; 3729 3730 extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; 3730 3731 extern const struct bpf_func_proto bpf_find_vma_proto;

+1 -1

kernel/bpf/arena.c

reviewed

··· 511 511 { 512 512 struct bpf_arena *arena = container_of(map, struct bpf_arena, map); 513 513 514 514 - if ((u64)off > arena->user_vm_end - arena->user_vm_start) 514 514 + if ((u64)off >= arena->user_vm_end - arena->user_vm_start) 515 515 return -ERANGE; 516 516 *imm = (unsigned long)arena->user_vm_start; 517 517 return 0;

+7 -18

kernel/bpf/liveness.c

reviewed

··· 1914 1914 return -ENOMEM; 1915 1915 } 1916 1916 1917 1917 - instance = call_instance(env, NULL, 0, 0); 1918 1918 - if (IS_ERR(instance)) { 1919 1919 - err = PTR_ERR(instance); 1920 1920 - goto out; 1921 1921 - } 1922 1922 - err = analyze_subprog(env, NULL, info, instance, callsites); 1923 1923 - if (err) 1924 1924 - goto out; 1925 1925 - 1926 1917 /* 1927 1927 - * Subprogs and callbacks that don't receive FP-derived arguments 1928 1928 - * cannot access ancestor stack frames, so they were skipped during 1929 1929 - * the recursive walk above. Async callbacks (timer, workqueue) are 1930 1930 - * also not reachable from the main program's call graph. Analyze 1931 1931 - * all unvisited subprogs as independent roots at depth 0. 1918 1918 + * Analyze every subprog in reverse topological order (callers 1919 1919 + * before callees) so that each subprog is analyzed before its 1920 1920 + * callees, allowing the recursive walk inside analyze_subprog() 1921 1921 + * to naturally reach callees that receive FP-derived args. 1932 1922 * 1933 1933 - * Use reverse topological order (callers before callees) so that 1934 1934 - * each subprog is analyzed before its callees, allowing the 1935 1935 - * recursive walk inside analyze_subprog() to naturally 1936 1936 - * reach nested callees that also lack FP-derived args. 1923 1923 + * Subprogs and callbacks that don't receive FP-derived arguments 1924 1924 + * cannot access ancestor stack frames are analyzed independently. 1925 1925 + * Async callbacks (timer, workqueue) are handled the same way. 1937 1926 */ 1938 1927 for (k = env->subprog_cnt - 1; k >= 0; k--) { 1939 1928 int sub = env->subprog_topo_order[k];

+8 -6

net/core/bpf_sk_storage.c

reviewed

··· 172 172 struct bpf_map *map; 173 173 174 174 smap = rcu_dereference(SDATA(selem)->smap); 175 175 - if (!(smap->map.map_flags & BPF_F_CLONE)) 175 175 + if (!smap || !(smap->map.map_flags & BPF_F_CLONE)) 176 176 continue; 177 177 178 178 /* Note that for lockless listeners adding new element ··· 531 531 } 532 532 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 533 533 534 534 - static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 534 534 + static int diag_get(struct bpf_local_storage_map *smap, 535 535 + struct bpf_local_storage_data *sdata, struct sk_buff *skb) 535 536 { 536 537 struct nlattr *nla_stg, *nla_value; 537 537 - struct bpf_local_storage_map *smap; 538 538 539 539 /* It cannot exceed max nlattr's payload */ 540 540 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); ··· 543 543 if (!nla_stg) 544 544 return -EMSGSIZE; 545 545 546 546 - smap = rcu_dereference(sdata->smap); 547 546 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 548 547 goto errout; 549 548 ··· 557 558 sdata->data, true); 558 559 else 559 560 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 561 561 + check_and_init_map_value(&smap->map, nla_data(nla_value)); 560 562 561 563 nla_nest_end(skb, nla_stg); 562 564 return 0; ··· 596 596 saved_len = skb->len; 597 597 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 598 598 smap = rcu_dereference(SDATA(selem)->smap); 599 599 + if (!smap) 600 600 + continue; 599 601 diag_size += nla_value_size(smap->map.value_size); 600 602 601 601 - if (nla_stgs && diag_get(SDATA(selem), skb)) 603 603 + if (nla_stgs && diag_get(smap, SDATA(selem), skb)) 602 604 /* Continue to learn diag_size */ 603 605 err = -EMSGSIZE; 604 606 } ··· 667 665 668 666 diag_size += nla_value_size(diag->maps[i]->value_size); 669 667 670 670 - if (nla_stgs && diag_get(sdata, skb)) 668 668 + if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb)) 671 669 /* Continue to learn diag_size */ 672 670 err = -EMSGSIZE; 673 671 }

+48 -7

net/core/filter.c

reviewed

··· 1654 1654 return err; 1655 1655 } 1656 1656 1657 1657 + static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu) 1658 1658 + { 1659 1659 + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 1660 1660 + struct bpf_prog *prog = aux->prog; 1661 1661 + 1662 1662 + bpf_release_orig_filter(prog); 1663 1663 + bpf_prog_free(prog); 1664 1664 + } 1665 1665 + 1657 1666 void sk_reuseport_prog_free(struct bpf_prog *prog) 1658 1667 { 1659 1668 if (!prog) 1660 1669 return; 1661 1670 1662 1662 - if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) 1663 1663 - bpf_prog_put(prog); 1671 1671 + if (bpf_prog_was_classic(prog)) 1672 1672 + call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu); 1664 1673 else 1665 1665 - bpf_prog_destroy(prog); 1674 1674 + bpf_prog_put(prog); 1666 1675 } 1667 1676 1668 1677 static inline int __bpf_try_make_writable(struct sk_buff *skb, ··· 5490 5481 char *optval, int *optlen, 5491 5482 bool getopt) 5492 5483 { 5493 5493 - if (sk->sk_protocol != IPPROTO_TCP) 5484 5484 + if (!sk_is_tcp(sk)) 5494 5485 return -EINVAL; 5495 5486 5496 5487 switch (optname) { ··· 5697 5688 .arg5_type = ARG_CONST_SIZE, 5698 5689 }; 5699 5690 5691 5691 + BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level, 5692 5692 + int, optname, char *, optval, int, optlen) 5693 5693 + { 5694 5694 + /* 5695 5695 + * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters 5696 5696 + * CA_EVENT_TX_START in bpf_tcp_cc. 5697 5697 + */ 5698 5698 + if (level == SOL_TCP && optname == TCP_NODELAY) 5699 5699 + return -EOPNOTSUPP; 5700 5700 + 5701 5701 + return _bpf_setsockopt(sk, level, optname, optval, optlen); 5702 5702 + } 5703 5703 + 5704 5704 + const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = { 5705 5705 + .func = bpf_sk_setsockopt_nodelay, 5706 5706 + .gpl_only = false, 5707 5707 + .ret_type = RET_INTEGER, 5708 5708 + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5709 5709 + .arg2_type = ARG_ANYTHING, 5710 5710 + .arg3_type = ARG_ANYTHING, 5711 5711 + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5712 5712 + .arg5_type = ARG_CONST_SIZE, 5713 5713 + }; 5714 5714 + 5700 5715 BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, 5701 5716 int, optname, char *, optval, int, optlen) 5702 5717 { ··· 5864 5831 int, level, int, optname, char *, optval, int, optlen) 5865 5832 { 5866 5833 if (!is_locked_tcp_sock_ops(bpf_sock)) 5834 5834 + return -EOPNOTSUPP; 5835 5835 + 5836 5836 + /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */ 5837 5837 + if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB || 5838 5838 + bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) && 5839 5839 + level == SOL_TCP && optname == TCP_NODELAY) 5867 5840 return -EOPNOTSUPP; 5868 5841 5869 5842 return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); ··· 6482 6443 * against MTU of FIB lookup resulting net_device 6483 6444 */ 6484 6445 dev = dev_get_by_index_rcu(net, params->ifindex); 6446 6446 + if (unlikely(!dev)) 6447 6447 + return -ENODEV; 6485 6448 if (!is_skb_forwardable(dev, skb)) 6486 6449 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; 6487 6450 ··· 7484 7443 7485 7444 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) 7486 7445 { 7487 7487 - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 7446 7446 + if (sk_fullsock(sk) && sk_is_tcp(sk)) 7488 7447 return (unsigned long)sk; 7489 7448 7490 7449 return (unsigned long)NULL; ··· 11956 11915 */ 11957 11916 BTF_TYPE_EMIT(struct tcp6_sock); 11958 11917 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && 11959 11959 - sk->sk_family == AF_INET6) 11918 11918 + sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6) 11960 11919 return (unsigned long)sk; 11961 11920 11962 11921 return (unsigned long)NULL; ··· 11972 11931 11973 11932 BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) 11974 11933 { 11975 11975 - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 11934 11934 + if (sk && sk_fullsock(sk) && sk_is_tcp(sk)) 11976 11935 return (unsigned long)sk; 11977 11936 11978 11937 return (unsigned long)NULL;

+25 -14

net/core/sock_map.c

reviewed

··· 1630 1630 void (*saved_unhash)(struct sock *sk); 1631 1631 struct sk_psock *psock; 1632 1632 1633 1633 + retry: 1633 1634 rcu_read_lock(); 1634 1635 psock = sk_psock(sk); 1635 1636 if (unlikely(!psock)) { 1636 1637 rcu_read_unlock(); 1637 1638 saved_unhash = READ_ONCE(sk->sk_prot)->unhash; 1639 1639 + if (unlikely(saved_unhash == sock_map_unhash)) 1640 1640 + goto retry; 1638 1641 } else { 1639 1642 saved_unhash = psock->saved_unhash; 1640 1643 sock_map_remove_links(sk, psock); 1641 1644 rcu_read_unlock(); 1645 1645 + 1646 1646 + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) 1647 1647 + return; 1642 1648 } 1643 1643 - if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) 1644 1644 - return; 1649 1649 + 1645 1650 if (saved_unhash) 1646 1651 saved_unhash(sk); 1647 1652 } ··· 1657 1652 void (*saved_destroy)(struct sock *sk); 1658 1653 struct sk_psock *psock; 1659 1654 1655 1655 + retry: 1660 1656 rcu_read_lock(); 1661 1657 psock = sk_psock_get(sk); 1662 1658 if (unlikely(!psock)) { 1663 1659 rcu_read_unlock(); 1664 1660 saved_destroy = READ_ONCE(sk->sk_prot)->destroy; 1661 1661 + if (unlikely(saved_destroy == sock_map_destroy)) 1662 1662 + goto retry; 1665 1663 } else { 1666 1664 saved_destroy = psock->saved_destroy; 1667 1665 sock_map_remove_links(sk, psock); 1668 1666 rcu_read_unlock(); 1669 1667 sk_psock_stop(psock); 1670 1668 sk_psock_put(sk, psock); 1669 1669 + 1670 1670 + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) 1671 1671 + return; 1671 1672 } 1672 1672 - if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) 1673 1673 - return; 1673 1673 + 1674 1674 if (saved_destroy) 1675 1675 saved_destroy(sk); 1676 1676 } ··· 1686 1676 void (*saved_close)(struct sock *sk, long timeout); 1687 1677 struct sk_psock *psock; 1688 1678 1679 1679 + retry: 1689 1680 lock_sock(sk); 1690 1681 rcu_read_lock(); 1691 1691 - psock = sk_psock(sk); 1682 1682 + psock = sk_psock_get(sk); 1692 1683 if (likely(psock)) { 1693 1684 saved_close = psock->saved_close; 1694 1685 sock_map_remove_links(sk, psock); 1695 1695 - psock = sk_psock_get(sk); 1696 1696 - if (unlikely(!psock)) 1697 1697 - goto no_psock; 1698 1686 rcu_read_unlock(); 1699 1687 sk_psock_stop(psock); 1700 1688 release_sock(sk); 1701 1689 cancel_delayed_work_sync(&psock->work); 1702 1690 sk_psock_put(sk, psock); 1691 1691 + 1692 1692 + /* Make sure we do not recurse. This is a bug. 1693 1693 + * Leak the socket instead of crashing on a stack overflow. 1694 1694 + */ 1695 1695 + if (WARN_ON_ONCE(saved_close == sock_map_close)) 1696 1696 + return; 1703 1697 } else { 1704 1698 saved_close = READ_ONCE(sk->sk_prot)->close; 1705 1705 - no_psock: 1706 1699 rcu_read_unlock(); 1707 1700 release_sock(sk); 1701 1701 + 1702 1702 + if (unlikely(saved_close == sock_map_close)) 1703 1703 + goto retry; 1708 1704 } 1709 1705 1710 1710 - /* Make sure we do not recurse. This is a bug. 1711 1711 - * Leak the socket instead of crashing on a stack overflow. 1712 1712 - */ 1713 1713 - if (WARN_ON_ONCE(saved_close == sock_map_close)) 1714 1714 - return; 1715 1706 saved_close(sk, timeout); 1716 1707 } 1717 1708 EXPORT_SYMBOL_GPL(sock_map_close);

+1 -1

net/ipv4/bpf_tcp_ca.c

reviewed

··· 168 168 */ 169 169 if (prog_ops_moff(prog) != 170 170 offsetof(struct tcp_congestion_ops, release)) 171 171 - return &bpf_sk_setsockopt_proto; 171 171 + return &bpf_sk_setsockopt_nodelay_proto; 172 172 return NULL; 173 173 case BPF_FUNC_getsockopt: 174 174 /* Since get/setsockopt is usually expected to

+1 -1

net/mptcp/bpf.c

reviewed

··· 14 14 15 15 struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) 16 16 { 17 17 - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) 17 17 + if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk)) 18 18 return mptcp_sk(mptcp_subflow_ctx(sk)->conn); 19 19 20 20 return NULL;

net/xdp/xskmap.c

reviewed

··· 184 184 } 185 185 186 186 xs = (struct xdp_sock *)sock->sk; 187 187 + if (!READ_ONCE(xs->rx)) { 188 188 + sockfd_put(sock); 189 189 + return -ENOBUFS; 190 190 + } 187 191 188 192 map_entry = &m->xsk_map[i]; 189 193 node = xsk_map_node_alloc(m, map_entry);

+25 -1

tools/include/uapi/linux/stddef.h

reviewed

··· 3 3 #define _LINUX_STDDEF_H 4 4 5 5 6 6 - 7 6 #ifndef __always_inline 8 7 #define __always_inline __inline__ 9 8 #endif ··· 35 36 struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ 36 37 } ATTRS 37 38 39 39 + #ifdef __cplusplus 40 40 + /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ 41 41 + #define __DECLARE_FLEX_ARRAY(T, member) \ 42 42 + T member[0] 43 43 + #else 38 44 /** 39 45 * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union 40 46 * ··· 56 52 TYPE NAME[]; \ 57 53 } 58 54 #endif 55 55 + 56 56 + #ifndef __counted_by 57 57 + #define __counted_by(m) 58 58 + #endif 59 59 + 60 60 + #ifndef __counted_by_le 61 61 + #define __counted_by_le(m) 62 62 + #endif 63 63 + 64 64 + #ifndef __counted_by_be 65 65 + #define __counted_by_be(m) 66 66 + #endif 67 67 + 68 68 + #ifndef __counted_by_ptr 69 69 + #define __counted_by_ptr(m) 70 70 + #endif 71 71 + 72 72 + #define __kernel_nonstring 73 73 + 74 74 + #endif /* _LINUX_STDDEF_H */

tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

reviewed

··· 112 112 113 113 ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); 114 114 115 115 + ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option"); 116 116 + ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject, 117 117 + "cwnd_event_tx_start reject nodelay option"); 118 118 + 115 119 bpf_link__destroy(link); 116 120 bpf_cubic__destroy(cubic_skel); 117 121 }

+16 -1

tools/testing/selftests/bpf/prog_tests/sockopt_sk.c

reviewed

··· 190 190 fd = socket(AF_NETLINK, SOCK_RAW, 0); 191 191 if (fd < 0) { 192 192 log_err("Failed to create AF_NETLINK socket"); 193 193 - return -1; 193 193 + goto err; 194 194 } 195 195 196 196 buf.u32 = 1; ··· 210 210 goto err; 211 211 } 212 212 ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); 213 213 + 214 214 + /* Trick bpf_tcp_sock() with IPPROTO_TCP */ 215 215 + close(fd); 216 216 + fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP); 217 217 + if (!ASSERT_OK_FD(fd, "socket")) 218 218 + goto err; 219 219 + 220 220 + /* The BPF prog intercepts this before the kernel sees it, any 221 221 + * optlen works. Go with 4 bytes for simplicity. 222 222 + */ 223 223 + buf.u32 = 1; 224 224 + optlen = sizeof(buf.u32); 225 225 + err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen); 226 226 + if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)")) 227 227 + goto err; 213 228 214 229 free(big_buf); 215 230 close(fd);

tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c

reviewed

··· 507 507 508 508 ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); 509 509 510 510 + ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok"); 511 511 + ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject"); 512 512 + ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject"); 513 513 + 510 514 check_linum: 511 515 ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); 512 516 sk_fds_close(&sk_fds);

+14

tools/testing/selftests/bpf/progs/bpf_cubic.c

reviewed

··· 16 16 17 17 #include "bpf_tracing_net.h" 18 18 #include <bpf/bpf_tracing.h> 19 19 + #include <errno.h> 19 20 20 21 char _license[] SEC("license") = "GPL"; 21 22 ··· 171 170 ca->sample_cnt = 0; 172 171 } 173 172 173 173 + bool nodelay_init_reject = false; 174 174 + bool nodelay_cwnd_event_tx_start_reject = false; 175 175 + 174 176 SEC("struct_ops") 175 177 void BPF_PROG(bpf_cubic_init, struct sock *sk) 176 178 { 177 179 struct bpf_bictcp *ca = inet_csk_ca(sk); 180 180 + int true_val = 1, ret; 181 181 + 182 182 + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); 183 183 + if (ret == -EOPNOTSUPP) 184 184 + nodelay_init_reject = true; 178 185 179 186 bictcp_reset(ca); 180 187 ··· 198 189 { 199 190 struct bpf_bictcp *ca = inet_csk_ca(sk); 200 191 __u32 now = tcp_jiffies32; 192 192 + int true_val = 1, ret; 201 193 __s32 delta; 194 194 + 195 195 + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); 196 196 + if (ret == -EOPNOTSUPP) 197 197 + nodelay_cwnd_event_tx_start_reject = true; 202 198 203 199 delta = now - tcp_sk(sk)->lsndtime; 204 200

+16

tools/testing/selftests/bpf/progs/sockopt_sk.c

reviewed

··· 149 149 if (sk && sk->family == AF_NETLINK) 150 150 goto out; 151 151 152 152 + if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) { 153 153 + struct bpf_tcp_sock *tp = bpf_tcp_sock(sk); 154 154 + 155 155 + if (tp) { 156 156 + char saved_syn[60]; 157 157 + 158 158 + bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN, 159 159 + &saved_syn, sizeof(saved_syn)); 160 160 + goto consumed; 161 161 + } 162 162 + 163 163 + goto out; 164 164 + } 165 165 + 152 166 /* Make sure bpf_get_netns_cookie is callable. 153 167 */ 154 168 if (bpf_get_netns_cookie(NULL) == 0) ··· 238 224 return 0; /* couldn't get sk storage */ 239 225 240 226 storage->val = optval[0]; 227 227 + 228 228 + consumed: 241 229 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel 242 230 * setsockopt handler. 243 231 */

+14 -1

tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c

reviewed

··· 29 29 unsigned int nr_fin = 0; 30 30 unsigned int nr_hwtstamp = 0; 31 31 32 32 + bool nodelay_est_ok = false; 33 33 + bool nodelay_hdr_len_reject = false; 34 34 + bool nodelay_write_hdr_reject = false; 35 35 + 32 36 /* Check the header received from the active side */ 33 37 static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) 34 38 { ··· 304 300 SEC("sockops") 305 301 int misc_estab(struct bpf_sock_ops *skops) 306 302 { 307 307 - int true_val = 1; 303 303 + int true_val = 1, false_val = 0, ret; 308 304 309 305 switch (skops->op) { 310 306 case BPF_SOCK_OPS_TCP_LISTEN_CB: ··· 320 316 case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: 321 317 return handle_parse_hdr(skops); 322 318 case BPF_SOCK_OPS_HDR_OPT_LEN_CB: 319 319 + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); 320 320 + if (ret == -EOPNOTSUPP) 321 321 + nodelay_hdr_len_reject = true; 323 322 return handle_hdr_opt_len(skops); 324 323 case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: 324 324 + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); 325 325 + if (ret == -EOPNOTSUPP) 326 326 + nodelay_write_hdr_reject = true; 325 327 return handle_write_hdr_opt(skops); 326 328 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 329 329 + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val)); 330 330 + if (!ret) 331 331 + nodelay_est_ok = true; 327 332 return handle_passive_estab(skops); 328 333 } 329 334