Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

inet_diag: avoid cache line misses in inet_diag_bc_sk()

inet_diag_bc_sk() pulls five cache lines per socket,
while most filters only need the two first ones.

Add three booleans to struct inet_diag_dump_data,
that are selectively set if a filter needs specific socket fields.

- mark_needed /* INET_DIAG_BC_MARK_COND present. */
- cgroup_needed /* INET_DIAG_BC_CGROUP_COND present. */
- userlocks_needed /* INET_DIAG_BC_AUTO present. */

This removes millions of cache lines misses per ss invocation
when simple filters are specified on busy servers.

offsetof(struct sock, sk_userlocks) = 0xf3
offsetof(struct sock, sk_mark) = 0x20c
offsetof(struct sock, sk_cgrp_data) = 0x298

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250828102738.2065992-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
95fa7883 9529320a

+36 -21
+5
include/linux/inet_diag.h
··· 38 38 #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] 39 39 40 40 struct bpf_sk_storage_diag *bpf_stg_diag; 41 + bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */ 42 + #ifdef CONFIG_SOCK_CGROUP_DATA 43 + bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */ 44 + #endif 45 + bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */ 41 46 }; 42 47 43 48 struct inet_connection_sock;
+31 -21
net/ipv4/inet_diag.c
··· 605 605 entry.sport = READ_ONCE(inet->inet_num); 606 606 entry.dport = ntohs(READ_ONCE(inet->inet_dport)); 607 607 entry.ifindex = READ_ONCE(sk->sk_bound_dev_if); 608 - entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0; 609 - if (sk_fullsock(sk)) 610 - entry.mark = READ_ONCE(sk->sk_mark); 611 - else if (sk->sk_state == TCP_NEW_SYN_RECV) 612 - entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 613 - else if (sk->sk_state == TCP_TIME_WAIT) 614 - entry.mark = inet_twsk(sk)->tw_mark; 615 - else 616 - entry.mark = 0; 608 + if (cb_data->userlocks_needed) 609 + entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0; 610 + if (cb_data->mark_needed) { 611 + if (sk_fullsock(sk)) 612 + entry.mark = READ_ONCE(sk->sk_mark); 613 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 614 + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; 615 + else if (sk->sk_state == TCP_TIME_WAIT) 616 + entry.mark = inet_twsk(sk)->tw_mark; 617 + else 618 + entry.mark = 0; 619 + } 617 620 #ifdef CONFIG_SOCK_CGROUP_DATA 618 - entry.cgroup_id = sk_fullsock(sk) ? 619 - cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 621 + if (cb_data->cgroup_needed) 622 + entry.cgroup_id = sk_fullsock(sk) ? 623 + cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; 620 624 #endif 621 625 622 626 return inet_diag_bc_run(bc, &entry); ··· 720 716 } 721 717 #endif 722 718 723 - static int inet_diag_bc_audit(const struct nlattr *attr, 719 + static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data, 724 720 const struct sk_buff *skb) 725 721 { 726 - bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 722 + const struct nlattr *attr = cb_data->inet_diag_nla_bc; 727 723 const void *bytecode, *bc; 728 724 int bytecode_len, len; 725 + bool net_admin; 729 726 730 - if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) 727 + if (!attr) 728 + return 0; 729 + 730 + if (nla_len(attr) < sizeof(struct inet_diag_bc_op)) 731 731 return -EINVAL; 732 732 733 + net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); 733 734 bytecode = bc = nla_data(attr); 734 735 len = bytecode_len = nla_len(attr); 735 736 ··· 766 757 return -EPERM; 767 758 if (!valid_markcond(bc, len, &min_len)) 768 759 return -EINVAL; 760 + cb_data->mark_needed = true; 769 761 break; 770 762 #ifdef CONFIG_SOCK_CGROUP_DATA 771 763 case INET_DIAG_BC_CGROUP_COND: 772 764 if (!valid_cgroupcond(bc, len, &min_len)) 773 765 return -EINVAL; 766 + cb_data->cgroup_needed = true; 774 767 break; 775 768 #endif 776 769 case INET_DIAG_BC_AUTO: 770 + cb_data->userlocks_needed = true; 771 + fallthrough; 777 772 case INET_DIAG_BC_JMP: 778 773 case INET_DIAG_BC_NOP: 779 774 break; ··· 854 841 kfree(cb_data); 855 842 return err; 856 843 } 857 - nla = cb_data->inet_diag_nla_bc; 858 - if (nla) { 859 - err = inet_diag_bc_audit(nla, skb); 860 - if (err) { 861 - kfree(cb_data); 862 - return err; 863 - } 844 + err = inet_diag_bc_audit(cb_data, skb); 845 + if (err) { 846 + kfree(cb_data); 847 + return err; 864 848 } 865 849 866 850 nla = cb_data->inet_diag_nla_bpf_stgs;