Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock.

In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at
the beginning of a new cache line because

1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned
of __cacheline_group_begin(tcp_sock_write_tx)

2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned
of struct numa_drop_counters

3. in raw6_sock, struct numa_drop_counters is placed before
struct ipv6_pinfo

. struct ipv6_pinfo is 136 bytes, but the last cache line is
only used by ipv6_fl_list:

$ pahole -C ipv6_pinfo vmlinux
struct ipv6_pinfo {
...
/* --- cacheline 2 boundary (128 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */

/* size: 136, cachelines: 3, members: 23 */

Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock
to save a full cache line for {tcp6,udp6,raw6}_sock.

Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have
64 bytes less, while {tcp,udp,raw}_sock retain the same size.

Before:

# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1408
UDPv6 1472
TCPv6 2560
RAW 1152
UDP 1280
TCP 2368

After:

# grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}'
RAWv6 1344
UDPv6 1408
TCPv6 2496
RAW 1152
UDP 1280
TCP 2368

Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the
same cache line.

$ pahole -C inet_sock vmlinux
...
/* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */
struct ipv6_pinfo * pinet6; /* 760 8 */
/* --- cacheline 12 boundary (768 bytes) --- */
struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */
unsigned long inet_flags; /* 776 8 */

Doc churn is due to the insufficient Type column (only 1 space short).

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251014224210.2964778-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Kuniyuki Iwashima and committed by
Jakub Kicinski
1c17f437 0746da01

+76 -74
+40 -39
Documentation/networking/net_cachelines/inet_sock.rst
··· 5 5 inet_sock struct fast path usage breakdown 6 6 ========================================== 7 7 8 - ======================= ===================== =================== =================== ====================================================================================================== 9 - Type Name fastpath_tx_access fastpath_rx_access comment 10 - ======================= ===================== =================== =================== ====================================================================================================== 11 - struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data 12 - struct ipv6_pinfo* pinet6 13 - be16 inet_sport read_mostly __tcp_transmit_skb 14 - be32 inet_daddr read_mostly ip_select_ident_segs 15 - be32 inet_rcv_saddr 16 - be16 inet_dport read_mostly __tcp_transmit_skb 17 - u16 inet_num 18 - be32 inet_saddr 19 - s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl 20 - u16 cmsg_flags 21 - struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit 22 - u16 inet_id read_mostly ip_select_ident_segs 23 - u8 tos read_mostly ip_queue_xmit 24 - u8 min_ttl 25 - u8 mc_ttl 26 - u8 pmtudisc 27 - u8:1 recverr 28 - u8:1 is_icsk 29 - u8:1 freebind 30 - u8:1 hdrincl 31 - u8:1 mc_loop 32 - u8:1 transparent 33 - u8:1 mc_all 34 - u8:1 nodefrag 35 - u8:1 bind_address_no_port 36 - u8:1 recverr_rfc4884 37 - u8:1 defer_connect read_mostly tcp_sendmsg_fastopen 38 - u8 rcv_tos 39 - u8 convert_csum 40 - int uc_index 41 - int mc_index 42 - be32 mc_addr 43 - struct ip_mc_socklist* mc_list 44 - struct inet_cork_full cork read_mostly __tcp_transmit_skb 45 - struct local_port_range 46 - ======================= ===================== =================== =================== ====================================================================================================== 8 + ======================== ===================== =================== =================== ====================================================================================================== 9 + Type Name fastpath_tx_access fastpath_rx_access comment 10 + ======================== ===================== =================== =================== ====================================================================================================== 11 + struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data 12 + struct ipv6_pinfo* pinet6 13 + struct ipv6_fl_socklist* ipv6_fl_list read_mostly tcp_v6_connect,__ip6_datagram_connect,udpv6_sendmsg,rawv6_sendmsg 14 + be16 inet_sport read_mostly __tcp_transmit_skb 15 + be32 inet_daddr read_mostly ip_select_ident_segs 16 + be32 inet_rcv_saddr 17 + be16 inet_dport read_mostly __tcp_transmit_skb 18 + u16 inet_num 19 + be32 inet_saddr 20 + s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl 21 + u16 cmsg_flags 22 + struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit 23 + u16 inet_id read_mostly ip_select_ident_segs 24 + u8 tos read_mostly ip_queue_xmit 25 + u8 min_ttl 26 + u8 mc_ttl 27 + u8 pmtudisc 28 + u8:1 recverr 29 + u8:1 is_icsk 30 + u8:1 freebind 31 + u8:1 hdrincl 32 + u8:1 mc_loop 33 + u8:1 transparent 34 + u8:1 mc_all 35 + u8:1 nodefrag 36 + u8:1 bind_address_no_port 37 + u8:1 recverr_rfc4884 38 + u8:1 defer_connect read_mostly tcp_sendmsg_fastopen 39 + u8 rcv_tos 40 + u8 convert_csum 41 + int uc_index 42 + int mc_index 43 + be32 mc_addr 44 + struct ip_mc_socklist* mc_list 45 + struct inet_cork_full cork read_mostly __tcp_transmit_skb 46 + struct local_port_range 47 + ======================== ===================== =================== =================== ======================================================================================================
+2 -2
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
··· 1199 1199 struct ipv6_pinfo *newnp = inet6_sk(newsk); 1200 1200 struct ipv6_pinfo *np = inet6_sk(lsk); 1201 1201 1202 - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; 1202 + newinet->pinet6 = &newtcp6sk->inet6; 1203 + newinet->ipv6_fl_list = NULL; 1203 1204 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1204 1205 newsk->sk_v6_daddr = treq->ir_v6_rmt_addr; 1205 1206 newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr; 1206 1207 inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr; 1207 - newnp->ipv6_fl_list = NULL; 1208 1208 newnp->pktoptions = NULL; 1209 1209 newsk->sk_bound_dev_if = treq->ir_iif; 1210 1210 newinet->inet_opt = NULL;
-1
include/linux/ipv6.h
··· 271 271 272 272 struct ipv6_mc_socklist __rcu *ipv6_mc_list; 273 273 struct ipv6_ac_socklist *ipv6_ac_list; 274 - struct ipv6_fl_socklist __rcu *ipv6_fl_list; 275 274 }; 276 275 277 276 /* We currently use available bits from inet_sk(sk)->inet_flags,
+1
include/net/inet_sock.h
··· 214 214 struct sock sk; 215 215 #if IS_ENABLED(CONFIG_IPV6) 216 216 struct ipv6_pinfo *pinet6; 217 + struct ipv6_fl_socklist __rcu *ipv6_fl_list; 217 218 #endif 218 219 /* Socket demultiplex comparisons on incoming packets. */ 219 220 #define inet_daddr sk.__sk_common.skc_daddr
+21 -23
net/ipv6/ip6_flowlabel.c
··· 66 66 fl != NULL; \ 67 67 fl = rcu_dereference(fl->next)) 68 68 69 - #define for_each_sk_fl_rcu(np, sfl) \ 70 - for (sfl = rcu_dereference(np->ipv6_fl_list); \ 69 + #define for_each_sk_fl_rcu(sk, sfl) \ 70 + for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \ 71 71 sfl != NULL; \ 72 72 sfl = rcu_dereference(sfl->next)) 73 73 ··· 262 262 struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) 263 263 { 264 264 struct ipv6_fl_socklist *sfl; 265 - struct ipv6_pinfo *np = inet6_sk(sk); 266 265 267 266 label &= IPV6_FLOWLABEL_MASK; 268 267 269 268 rcu_read_lock(); 270 - for_each_sk_fl_rcu(np, sfl) { 269 + for_each_sk_fl_rcu(sk, sfl) { 271 270 struct ip6_flowlabel *fl = sfl->fl; 272 271 273 272 if (fl->label == label && atomic_inc_not_zero(&fl->users)) { ··· 282 283 283 284 void fl6_free_socklist(struct sock *sk) 284 285 { 285 - struct ipv6_pinfo *np = inet6_sk(sk); 286 + struct inet_sock *inet = inet_sk(sk); 286 287 struct ipv6_fl_socklist *sfl; 287 288 288 - if (!rcu_access_pointer(np->ipv6_fl_list)) 289 + if (!rcu_access_pointer(inet->ipv6_fl_list)) 289 290 return; 290 291 291 292 spin_lock_bh(&ip6_sk_fl_lock); 292 - while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, 293 + while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list, 293 294 lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { 294 - np->ipv6_fl_list = sfl->next; 295 + inet->ipv6_fl_list = sfl->next; 295 296 spin_unlock_bh(&ip6_sk_fl_lock); 296 297 297 298 fl_release(sfl->fl); ··· 469 470 470 471 static int mem_check(struct sock *sk) 471 472 { 472 - struct ipv6_pinfo *np = inet6_sk(sk); 473 - struct ipv6_fl_socklist *sfl; 474 473 int room = FL_MAX_SIZE - atomic_read(&fl_size); 474 + struct ipv6_fl_socklist *sfl; 475 475 int count = 0; 476 476 477 477 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) 478 478 return 0; 479 479 480 480 rcu_read_lock(); 481 - for_each_sk_fl_rcu(np, sfl) 481 + for_each_sk_fl_rcu(sk, sfl) 482 482 count++; 483 483 rcu_read_unlock(); 484 484 ··· 490 492 return 0; 491 493 } 492 494 493 - static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, 494 - struct ip6_flowlabel *fl) 495 + static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl, 496 + struct ip6_flowlabel *fl) 495 497 { 498 + struct inet_sock *inet = inet_sk(sk); 499 + 496 500 spin_lock_bh(&ip6_sk_fl_lock); 497 501 sfl->fl = fl; 498 - sfl->next = np->ipv6_fl_list; 499 - rcu_assign_pointer(np->ipv6_fl_list, sfl); 502 + sfl->next = inet->ipv6_fl_list; 503 + rcu_assign_pointer(inet->ipv6_fl_list, sfl); 500 504 spin_unlock_bh(&ip6_sk_fl_lock); 501 505 } 502 506 ··· 520 520 521 521 rcu_read_lock(); 522 522 523 - for_each_sk_fl_rcu(np, sfl) { 523 + for_each_sk_fl_rcu(sk, sfl) { 524 524 if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { 525 525 spin_lock_bh(&ip6_fl_lock); 526 526 freq->flr_label = sfl->fl->label; ··· 559 559 } 560 560 561 561 spin_lock_bh(&ip6_sk_fl_lock); 562 - for (sflp = &np->ipv6_fl_list; 562 + for (sflp = &inet_sk(sk)->ipv6_fl_list; 563 563 (sfl = socklist_dereference(*sflp)) != NULL; 564 564 sflp = &sfl->next) { 565 565 if (sfl->fl->label == freq->flr_label) ··· 579 579 580 580 static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) 581 581 { 582 - struct ipv6_pinfo *np = inet6_sk(sk); 583 582 struct net *net = sock_net(sk); 584 583 struct ipv6_fl_socklist *sfl; 585 584 int err; 586 585 587 586 rcu_read_lock(); 588 - for_each_sk_fl_rcu(np, sfl) { 587 + for_each_sk_fl_rcu(sk, sfl) { 589 588 if (sfl->fl->label == freq->flr_label) { 590 589 err = fl6_renew(sfl->fl, freq->flr_linger, 591 590 freq->flr_expires); ··· 613 614 { 614 615 struct ipv6_fl_socklist *sfl, *sfl1 = NULL; 615 616 struct ip6_flowlabel *fl, *fl1 = NULL; 616 - struct ipv6_pinfo *np = inet6_sk(sk); 617 617 struct net *net = sock_net(sk); 618 618 int err; 619 619 ··· 643 645 if (freq->flr_label) { 644 646 err = -EEXIST; 645 647 rcu_read_lock(); 646 - for_each_sk_fl_rcu(np, sfl) { 648 + for_each_sk_fl_rcu(sk, sfl) { 647 649 if (sfl->fl->label == freq->flr_label) { 648 650 if (freq->flr_flags & IPV6_FL_F_EXCL) { 649 651 rcu_read_unlock(); ··· 680 682 fl1->linger = fl->linger; 681 683 if ((long)(fl->expires - fl1->expires) > 0) 682 684 fl1->expires = fl->expires; 683 - fl_link(np, sfl1, fl1); 685 + fl_link(sk, sfl1, fl1); 684 686 fl_free(fl); 685 687 return 0; 686 688 ··· 714 716 } 715 717 } 716 718 717 - fl_link(np, sfl1, fl); 719 + fl_link(sk, sfl1, fl); 718 720 return 0; 719 721 done: 720 722 fl_free(fl);
+7 -6
net/ipv6/tcp_ipv6.c
··· 1386 1386 if (!newsk) 1387 1387 return NULL; 1388 1388 1389 - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1389 + newinet = inet_sk(newsk); 1390 + newinet->pinet6 = tcp_inet6_sk(newsk); 1391 + newinet->ipv6_fl_list = NULL; 1390 1392 1391 1393 newnp = tcp_inet6_sk(newsk); 1392 1394 newtp = tcp_sk(newsk); ··· 1407 1405 1408 1406 newnp->ipv6_mc_list = NULL; 1409 1407 newnp->ipv6_ac_list = NULL; 1410 - newnp->ipv6_fl_list = NULL; 1411 1408 newnp->pktoptions = NULL; 1412 1409 newnp->opt = NULL; 1413 1410 newnp->mcast_oif = inet_iif(skb); ··· 1454 1453 newsk->sk_gso_type = SKB_GSO_TCPV6; 1455 1454 inet6_sk_rx_dst_set(newsk, skb); 1456 1455 1457 - inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1456 + newinet = inet_sk(newsk); 1457 + newinet->pinet6 = tcp_inet6_sk(newsk); 1458 + newinet->ipv6_fl_list = NULL; 1459 + newinet->inet_opt = NULL; 1458 1460 1459 1461 newtp = tcp_sk(newsk); 1460 - newinet = inet_sk(newsk); 1461 1462 newnp = tcp_inet6_sk(newsk); 1462 1463 1463 1464 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ··· 1472 1469 1473 1470 First: no IPv4 options. 1474 1471 */ 1475 - newinet->inet_opt = NULL; 1476 1472 newnp->ipv6_mc_list = NULL; 1477 1473 newnp->ipv6_ac_list = NULL; 1478 - newnp->ipv6_fl_list = NULL; 1479 1474 1480 1475 /* Clone RX bits */ 1481 1476 newnp->rxopt.all = np->rxopt.all;
+5 -3
net/sctp/ipv6.c
··· 782 782 struct sctp_association *asoc, 783 783 bool kern) 784 784 { 785 - struct sock *newsk; 786 785 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 787 786 struct sctp6_sock *newsctp6sk; 787 + struct inet_sock *newinet; 788 + struct sock *newsk; 788 789 789 790 newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); 790 791 if (!newsk) ··· 797 796 sock_reset_flag(sk, SOCK_ZAPPED); 798 797 799 798 newsctp6sk = (struct sctp6_sock *)newsk; 800 - inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; 799 + newinet = inet_sk(newsk); 800 + newinet->pinet6 = &newsctp6sk->inet6; 801 + newinet->ipv6_fl_list = NULL; 801 802 802 803 sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; 803 804 ··· 808 805 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 809 806 newnp->ipv6_mc_list = NULL; 810 807 newnp->ipv6_ac_list = NULL; 811 - newnp->ipv6_fl_list = NULL; 812 808 813 809 sctp_v6_copy_ip_options(sk, newsk); 814 810