Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'inet_diag-make-dumps-faster-with-simple-filters'

Eric Dumazet says:

====================
inet_diag: make dumps faster with simple filters

inet_diag_bc_sk() pulls five cache lines per socket,
while most filters only need the two first ones.

We can change it to only pull needed cache lines,
to make things like "ss -temoi src :21456" much faster.

First patches (1-3) are annotating data-races as a first step.
====================

Link: https://patch.msgid.link/20250828102738.2065992-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+70 -71
+6 -1
include/linux/inet_diag.h
··· 38 38 #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] 39 39 40 40 struct bpf_sk_storage_diag *bpf_stg_diag; 41 + bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */ 42 + #ifdef CONFIG_SOCK_CGROUP_DATA 43 + bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */ 44 + #endif 45 + bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */ 41 46 }; 42 47 43 48 struct inet_connection_sock; ··· 51 46 const struct inet_diag_req_v2 *req, 52 47 u16 nlmsg_flags, bool net_admin); 53 48 54 - int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); 49 + int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk); 55 50 56 51 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); 57 52
+48 -37
net/ipv4/inet_diag.c
··· 71 71 72 72 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) 73 73 { 74 - r->idiag_family = sk->sk_family; 74 + r->idiag_family = READ_ONCE(sk->sk_family); 75 75 76 - r->id.idiag_sport = htons(sk->sk_num); 77 - r->id.idiag_dport = sk->sk_dport; 78 - r->id.idiag_if = sk->sk_bound_dev_if; 76 + r->id.idiag_sport = htons(READ_ONCE(sk->sk_num)); 77 + r->id.idiag_dport = READ_ONCE(sk->sk_dport); 78 + r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if); 79 79 sock_diag_save_cookie(sk, r->id.idiag_cookie); 80 80 81 81 #if IS_ENABLED(CONFIG_IPV6) 82 - if (sk->sk_family == AF_INET6) { 83 - *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; 84 - *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; 82 + if (r->idiag_family == AF_INET6) { 83 + data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr); 84 + data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr); 85 85 } else 86 86 #endif 87 87 { 88 88 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); 89 89 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); 90 90 91 - r->id.idiag_src[0] = sk->sk_rcv_saddr; 92 - r->id.idiag_dst[0] = sk->sk_daddr; 91 + r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr); 92 + r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr); 93 93 } 94 94 } 95 95 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); ··· 580 580 const struct sock *sk) 581 581 { 582 582 #if IS_ENABLED(CONFIG_IPV6) 583 - if (sk->sk_family == AF_INET6) { 583 + if (entry->family == AF_INET6) { 584 584 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; 585 585 entry->daddr = sk->sk_v6_daddr.s6_addr32; 586 586 } else ··· 591 591 } 592 592 } 593 593 594 - int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) 594 + int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk) 595 595 { 596 - struct inet_sock *inet = inet_sk(sk); 596 + const struct nlattr *bc = cb_data->inet_diag_nla_bc; 597 + const struct inet_sock *inet = inet_sk(sk); 597 598 struct inet_diag_entry entry; 598 599 599 600 if (!bc) 600 601 return 1; 601 602 602 - entry.family = sk->sk_family; 603 + entry.family = READ_ONCE(sk->sk_family); 603 604 entry_fill_addrs(&entry, sk); 604 - entry.sport = inet->inet_num; 605 - entry.dport = ntohs(inet->inet_dport); 606 - entry.ifindex = sk->sk_bound_dev_if; 607 - entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; 608 - if (sk_fullsock(sk)) 609 - entry.mark = READ_ONCE(sk->sk_mark); 610 - else if (sk->sk_state == TCP_NEW_SYN_RECV) 611 - entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 612 - else if (sk->sk_state == TCP_TIME_WAIT) 613 - entry.mark = inet_twsk(sk)->tw_mark; 614 - else 615 - entry.mark = 0; 605 + entry.sport = READ_ONCE(inet->inet_num); 606 + entry.dport = ntohs(READ_ONCE(inet->inet_dport)); 607 + entry.ifindex = READ_ONCE(sk->sk_bound_dev_if); 608 + if (cb_data->userlocks_needed) 609 + entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0; 610 + if (cb_data->mark_needed) { 611 + if (sk_fullsock(sk)) 612 + entry.mark = READ_ONCE(sk->sk_mark); 613 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 614 + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 615 + else if (sk->sk_state == TCP_TIME_WAIT) 616 + entry.mark = inet_twsk(sk)->tw_mark; 617 + else 618 + entry.mark = 0; 619 + } 616 620 #ifdef CONFIG_SOCK_CGROUP_DATA 617 - entry.cgroup_id = sk_fullsock(sk) ? 618 - cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 621 + if (cb_data->cgroup_needed) 622 + entry.cgroup_id = sk_fullsock(sk) ? 623 + cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 619 624 #endif 620 625 621 626 return inet_diag_bc_run(bc, &entry); ··· 720 715 } 721 716 #endif 722 717 723 - static int inet_diag_bc_audit(const struct nlattr *attr, 718 + static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data, 724 719 const struct sk_buff *skb) 725 720 { 726 - bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 721 + const struct nlattr *attr = cb_data->inet_diag_nla_bc; 727 722 const void *bytecode, *bc; 728 723 int bytecode_len, len; 724 + bool net_admin; 729 725 730 - if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 726 + if (!attr) 727 + return 0; 728 + 729 + if (nla_len(attr) < sizeof(struct inet_diag_bc_op)) 731 730 return -EINVAL; 732 731 732 + net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 733 733 bytecode = bc = nla_data(attr); 734 734 len = bytecode_len = nla_len(attr); 735 735 ··· 766 756 return -EPERM; 767 757 if (!valid_markcond(bc, len, &min_len)) 768 758 return -EINVAL; 759 + cb_data->mark_needed = true; 769 760 break; 770 761 #ifdef CONFIG_SOCK_CGROUP_DATA 771 762 case INET_DIAG_BC_CGROUP_COND: 772 763 if (!valid_cgroupcond(bc, len, &min_len)) 773 764 return -EINVAL; 765 + cb_data->cgroup_needed = true; 774 766 break; 775 767 #endif 776 768 case INET_DIAG_BC_AUTO: 769 + cb_data->userlocks_needed = true; 770 + fallthrough; 777 771 case INET_DIAG_BC_JMP: 778 772 case INET_DIAG_BC_NOP: 779 773 break; ··· 854 840 kfree(cb_data); 855 841 return err; 856 842 } 857 - nla = cb_data->inet_diag_nla_bc; 858 - if (nla) { 859 - err = inet_diag_bc_audit(nla, skb); 860 - if (err) { 861 - kfree(cb_data); 862 - return err; 863 - } 843 + err = inet_diag_bc_audit(cb_data, skb); 844 + if (err) { 845 + kfree(cb_data); 846 + return err; 864 847 } 865 848 866 849 nla = cb_data->inet_diag_nla_bpf_stgs;
+3 -7
net/ipv4/raw_diag.c
··· 126 126 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, 127 127 struct netlink_callback *cb, 128 128 const struct inet_diag_req_v2 *r, 129 - struct nlattr *bc, bool net_admin) 129 + bool net_admin) 130 130 { 131 - if (!inet_diag_bc_sk(bc, sk)) 131 + if (!inet_diag_bc_sk(cb->data, sk)) 132 132 return 0; 133 133 134 134 return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin); ··· 140 140 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 141 141 struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); 142 142 struct net *net = sock_net(skb->sk); 143 - struct inet_diag_dump_data *cb_data; 144 143 int num, s_num, slot, s_slot; 145 144 struct hlist_head *hlist; 146 145 struct sock *sk = NULL; 147 - struct nlattr *bc; 148 146 149 147 if (IS_ERR(hashinfo)) 150 148 return; 151 149 152 - cb_data = cb->data; 153 - bc = cb_data->inet_diag_nla_bc; 154 150 s_slot = cb->args[0]; 155 151 num = s_num = cb->args[1]; 156 152 ··· 170 174 if (r->id.idiag_dport != inet->inet_dport && 171 175 r->id.idiag_dport) 172 176 goto next; 173 - if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) 177 + if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) 174 178 goto out_unlock; 175 179 next: 176 180 num++;
+5 -7
net/ipv4/tcp_diag.c
··· 248 248 inet_diag_msg_common_fill(r, sk); 249 249 r->idiag_state = TCP_SYN_RECV; 250 250 r->idiag_timer = 1; 251 - r->idiag_retrans = reqsk->num_retrans; 251 + r->idiag_retrans = READ_ONCE(reqsk->num_retrans); 252 252 253 253 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 254 254 offsetof(struct sock, sk_cookie)); 255 255 256 - tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 256 + tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies; 257 257 r->idiag_expires = jiffies_delta_to_msecs(tmo); 258 258 r->idiag_rqueue = 0; 259 259 r->idiag_wqueue = 0; ··· 320 320 u32 idiag_states = r->idiag_states; 321 321 struct inet_hashinfo *hashinfo; 322 322 int i, num, s_i, s_num; 323 - struct nlattr *bc; 324 323 struct sock *sk; 325 324 326 325 hashinfo = net->ipv4.tcp_death_row.hashinfo; 327 - bc = cb_data->inet_diag_nla_bc; 328 326 if (idiag_states & TCPF_SYN_RECV) 329 327 idiag_states |= TCPF_NEW_SYN_RECV; 330 328 s_i = cb->args[1]; ··· 363 365 r->id.idiag_sport) 364 366 goto next_listen; 365 367 366 - if (!inet_diag_bc_sk(bc, sk)) 368 + if (!inet_diag_bc_sk(cb_data, sk)) 367 369 goto next_listen; 368 370 369 371 if (inet_sk_diag_fill(sk, inet_csk(sk), skb, ··· 430 432 r->sdiag_family != sk->sk_family) 431 433 goto next_bind; 432 434 433 - if (!inet_diag_bc_sk(bc, sk)) 435 + if (!inet_diag_bc_sk(cb_data, sk)) 434 436 goto next_bind; 435 437 436 438 sock_hold(sk); ··· 517 519 goto next_normal; 518 520 twsk_build_assert(); 519 521 520 - if (!inet_diag_bc_sk(bc, sk)) 522 + if (!inet_diag_bc_sk(cb_data, sk)) 521 523 goto next_normal; 522 524 523 525 if (!refcount_inc_not_zero(&sk->sk_refcnt))
+1 -1
net/ipv4/tcp_output.c
··· 4438 4438 tcp_sk_rw(sk)->total_retrans++; 4439 4439 } 4440 4440 trace_tcp_retransmit_synack(sk, req); 4441 - req->num_retrans++; 4441 + WRITE_ONCE(req->num_retrans, req->num_retrans + 1); 4442 4442 } 4443 4443 return res; 4444 4444 }
+3 -7
net/ipv4/udp_diag.c
··· 16 16 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, 17 17 struct netlink_callback *cb, 18 18 const struct inet_diag_req_v2 *req, 19 - struct nlattr *bc, bool net_admin) 19 + bool net_admin) 20 20 { 21 - if (!inet_diag_bc_sk(bc, sk)) 21 + if (!inet_diag_bc_sk(cb->data, sk)) 22 22 return 0; 23 23 24 24 return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI, ··· 92 92 { 93 93 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 94 94 struct net *net = sock_net(skb->sk); 95 - struct inet_diag_dump_data *cb_data; 96 95 int num, s_num, slot, s_slot; 97 - struct nlattr *bc; 98 96 99 - cb_data = cb->data; 100 - bc = cb_data->inet_diag_nla_bc; 101 97 s_slot = cb->args[0]; 102 98 num = s_num = cb->args[1]; 103 99 ··· 126 130 r->id.idiag_dport) 127 131 goto next; 128 132 129 - if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { 133 + if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) { 130 134 spin_unlock_bh(&hslot->lock); 131 135 goto done; 132 136 }
+4 -11
net/mptcp/mptcp_diag.c
··· 15 15 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, 16 16 struct netlink_callback *cb, 17 17 const struct inet_diag_req_v2 *req, 18 - struct nlattr *bc, bool net_admin) 18 + bool net_admin) 19 19 { 20 - if (!inet_diag_bc_sk(bc, sk)) 20 + if (!inet_diag_bc_sk(cb->data, sk)) 21 21 return 0; 22 22 23 23 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI, ··· 76 76 const struct inet_diag_req_v2 *r, 77 77 bool net_admin) 78 78 { 79 - struct inet_diag_dump_data *cb_data = cb->data; 80 79 struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; 81 - struct nlattr *bc = cb_data->inet_diag_nla_bc; 82 80 struct net *net = sock_net(skb->sk); 83 81 struct inet_hashinfo *hinfo; 84 82 int i; ··· 119 121 if (!refcount_inc_not_zero(&sk->sk_refcnt)) 120 122 goto next_listen; 121 123 122 - ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); 124 + ret = sk_diag_dump(sk, skb, cb, r, net_admin); 123 125 124 126 sock_put(sk); 125 127 ··· 152 154 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); 153 155 struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; 154 156 struct net *net = sock_net(skb->sk); 155 - struct inet_diag_dump_data *cb_data; 156 157 struct mptcp_sock *msk; 157 - struct nlattr *bc; 158 158 159 159 BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx)); 160 - 161 - cb_data = cb->data; 162 - bc = cb_data->inet_diag_nla_bc; 163 160 164 161 while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot, 165 162 &diag_ctx->s_num)) != NULL) { ··· 174 181 r->id.idiag_dport) 175 182 goto next; 176 183 177 - ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); 184 + ret = sk_diag_dump(sk, skb, cb, r, net_admin); 178 185 next: 179 186 sock_put(sk); 180 187 if (ret < 0) {