Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'tcp-move-few-fields-for-data-locality'

Eric Dumazet says:

====================
tcp: move few fields for data locality

After recent additions (PSP and AccECN) I wanted to make another
round on fields locations to increase data locality.

This series manages to shrink TCP and TCPv6 objects by 128 bytes,
but more importantly should reduce number of touched cache lines
in TCP fast paths.

There is more to come.

v2: removed tcp CACHELINE_ASSERT_GROUP_SIZE after a kernel build bot
reported an error.
====================

Link: https://patch.msgid.link/20250919204856.2977245-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+31 -39
+3 -3
Documentation/networking/net_cachelines/tcp_sock.rst
··· 26 26 u32 dsack_dups 27 27 u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx) 28 28 u32 snd_sml read_write tcp_minshall_check,tcp_minshall_update 29 - u32 rcv_tstamp read_mostly tcp_ack 30 - void * tcp_clean_acked read_mostly tcp_ack 29 + u32 rcv_tstamp read_write read_write tcp_ack 30 + void * tcp_clean_acked read_mostly tcp_ack 31 31 u32 lsndtime read_write tcp_slow_start_after_idle_check,tcp_event_data_sent 32 32 u32 last_oow_ack_time 33 33 u32 compressed_ack_rcv_nxt ··· 57 57 u8:2 fastopen_client_fail 58 58 u8:4 nonagle read_write tcp_skb_entail,tcp_push_pending_frames 59 59 u8:1 thin_lto 60 - u8:1 recvmsg_inq 60 + u8:1 recvmsg_inq read_mostly tcp_recvmsg 61 61 u8:1 repair read_mostly tcp_write_xmit 62 62 u8:1 frto 63 63 u8 repair_queue
+10 -10
include/linux/tcp.h
··· 215 215 u16 gso_segs; /* Max number of segs per GSO packet */ 216 216 /* from STCP, retrans queue hinting */ 217 217 struct sk_buff *retransmit_skb_hint; 218 + #if defined(CONFIG_TLS_DEVICE) 219 + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); 220 + #endif 218 221 __cacheline_group_end(tcp_sock_read_tx); 219 222 220 223 /* TXRX read-mostly hotpath cache lines */ ··· 235 232 repair : 1, 236 233 tcp_usec_ts : 1, /* TSval values in usec */ 237 234 is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ 238 - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ 235 + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ 236 + recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ 239 237 __cacheline_group_end(tcp_sock_read_txrx); 240 238 241 239 /* RX read-mostly hotpath cache lines */ 242 240 __cacheline_group_begin(tcp_sock_read_rx); 243 241 u32 copied_seq; /* Head of yet unread data */ 244 - u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ 245 242 u32 snd_wl1; /* Sequence for window update */ 246 243 u32 tlp_high_seq; /* snd_nxt at the time of TLP */ 247 244 u32 rttvar_us; /* smoothed mdev_max */ ··· 249 246 u16 advmss; /* Advertised MSS */ 250 247 u16 urg_data; /* Saved octet of OOB data and control flags */ 251 248 u32 lost; /* Total data packets lost incl. rexmits */ 249 + u32 snd_ssthresh; /* Slow start size threshold */ 252 250 struct minmax rtt_min; 253 251 /* OOO segments go in this rbtree. Socket lock must be held. */ 254 252 struct rb_root out_of_order_queue; 255 - #if defined(CONFIG_TLS_DEVICE) 256 - void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); 257 - #endif 258 - u32 snd_ssthresh; /* Slow start size threshold */ 259 - u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ 260 253 __cacheline_group_end(tcp_sock_read_rx); 261 254 262 255 /* TX read-write hotpath cache lines */ ··· 318 319 */ 319 320 u32 app_limited; /* limited until "delivered" reaches this val */ 320 321 u32 rcv_wnd; /* Current receiver window */ 322 + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ 321 323 /* 322 324 * Options received (usually on last packet, some only on SYN packets). 323 325 */ ··· 448 448 * the first SYN. */ 449 449 u32 undo_marker; /* snd_una upon a new recovery episode. */ 450 450 int undo_retrans; /* number of undoable retransmissions. */ 451 + u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG 452 + * while socket was owned by user. 453 + */ 451 454 u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans 452 455 * Total data bytes retransmitted 453 456 */ ··· 497 494 u32 probe_seq_end; 498 495 } mtu_probe; 499 496 u32 plb_rehash; /* PLB-triggered rehash attempts */ 500 - u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG 501 - * while socket was owned by user. 502 - */ 503 497 #if IS_ENABLED(CONFIG_MPTCP) 504 498 bool is_mptcp; 505 499 #endif
+1 -1
include/net/request_sock.h
··· 185 185 struct request_sock_queue { 186 186 spinlock_t rskq_lock; 187 187 u8 rskq_defer_accept; 188 + u8 synflood_warned; 188 189 189 - u32 synflood_warned; 190 190 atomic_t qlen; 191 191 atomic_t young; 192 192
+5 -5
include/net/sock.h
··· 467 467 __cacheline_group_begin(sock_write_tx); 468 468 int sk_write_pending; 469 469 atomic_t sk_omem_alloc; 470 - int sk_sndbuf; 470 + int sk_err_soft; 471 471 472 472 int sk_wmem_queued; 473 473 refcount_t sk_wmem_alloc; ··· 492 492 long sk_sndtimeo; 493 493 u32 sk_priority; 494 494 u32 sk_mark; 495 + kuid_t sk_uid; 496 + u16 sk_protocol; 497 + u16 sk_type; 495 498 struct dst_entry __rcu *sk_dst_cache; 496 499 netdev_features_t sk_route_caps; 497 500 #ifdef CONFIG_SOCK_VALIDATE_XMIT ··· 507 504 unsigned int sk_gso_max_size; 508 505 gfp_t sk_allocation; 509 506 u32 sk_txhash; 507 + int sk_sndbuf; 510 508 u8 sk_pacing_shift; 511 509 bool sk_use_task_frag; 512 510 __cacheline_group_end(sock_read_tx); ··· 521 517 sk_no_check_tx : 1, 522 518 sk_no_check_rx : 1; 523 519 u8 sk_shutdown; 524 - u16 sk_type; 525 - u16 sk_protocol; 526 520 unsigned long sk_lingertime; 527 521 struct proto *sk_prot_creator; 528 522 rwlock_t sk_callback_lock; 529 - int sk_err_soft; 530 523 u32 sk_ack_backlog; 531 524 u32 sk_max_ack_backlog; 532 - kuid_t sk_uid; 533 525 unsigned long sk_ino; 534 526 spinlock_t sk_peer_lock; 535 527 int sk_bind_phc;
+4 -1
net/core/sock.c
··· 4452 4452 4453 4453 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); 4454 4454 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); 4455 - CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf); 4455 + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_err_soft); 4456 4456 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued); 4457 4457 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc); 4458 4458 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags); ··· 4471 4471 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo); 4472 4472 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority); 4473 4473 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark); 4474 + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_uid); 4475 + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_protocol); 4474 4476 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache); 4475 4477 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps); 4476 4478 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type); 4477 4479 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size); 4478 4480 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation); 4479 4481 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash); 4482 + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndbuf); 4480 4483 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs); 4481 4484 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift); 4482 4485 CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
+4 -16
net/ipv4/tcp.c
··· 5101 5101 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); 5102 5102 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); 5103 5103 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); 5104 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32); 5104 + #if IS_ENABLED(CONFIG_TLS_DEVICE) 5105 + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, tcp_clean_acked); 5106 + #endif 5105 5107 5106 5108 /* TXRX read-mostly hotpath cache lines */ 5107 5109 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); ··· 5114 5112 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); 5115 5113 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); 5116 5114 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); 5117 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); 5118 5115 5119 5116 /* RX read-mostly hotpath cache lines */ 5120 5117 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); 5121 - CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp); 5122 5118 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1); 5123 5119 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq); 5124 5120 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us); ··· 5127 5127 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); 5128 5128 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); 5129 5129 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); 5130 - #if IS_ENABLED(CONFIG_TLS_DEVICE) 5131 - CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked); 5132 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77); 5133 - #else 5134 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); 5135 - #endif 5136 5130 5137 5131 /* TX read-write hotpath cache lines */ 5138 5132 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); ··· 5145 5151 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); 5146 5152 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); 5147 5153 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); 5148 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 97); 5149 5154 5150 5155 /* TXRX read-write hotpath cache lines */ 5151 5156 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); ··· 5163 5170 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes); 5164 5171 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); 5165 5172 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); 5173 + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp); 5166 5174 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); 5167 - 5168 - /* 32bit arches with 8byte alignment on u64 fields might need padding 5169 - * before tcp_clock_cache. 5170 - */ 5171 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4); 5172 5175 5173 5176 /* RX read-write hotpath cache lines */ 5174 5177 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received); ··· 5182 5193 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); 5183 5194 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est); 5184 5195 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space); 5185 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 112); 5186 5196 } 5187 5197 5188 5198 void __init tcp_init(void)
+4 -3
net/ipv4/tcp_input.c
··· 4085 4085 /* We passed data and got it acked, remove any soft error 4086 4086 * log. Something worked... 4087 4087 */ 4088 - WRITE_ONCE(sk->sk_err_soft, 0); 4088 + if (READ_ONCE(sk->sk_err_soft)) 4089 + WRITE_ONCE(sk->sk_err_soft, 0); 4089 4090 WRITE_ONCE(icsk->icsk_probes_out, 0); 4090 4091 tp->rcv_tstamp = tcp_jiffies32; 4091 4092 if (!prior_packets) ··· 7282 7281 #endif 7283 7282 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 7284 7283 7285 - if (!READ_ONCE(queue->synflood_warned) && syncookies != 2 && 7286 - xchg(&queue->synflood_warned, 1) == 0) { 7284 + if (syncookies != 2 && !READ_ONCE(queue->synflood_warned)) { 7285 + WRITE_ONCE(queue->synflood_warned, 1); 7287 7286 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) { 7288 7287 net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n", 7289 7288 proto, inet6_rcv_saddr(sk),