Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: try to avoid safer when ACKs are thinned

Add newly acked pkts EWMA. When ACK thinning occurs, select
between safer and unsafe cep delta in AccECN processing based
on it. If the packets ACKed per ACK tends to be large, don't
conservatively assume ACE field overflow.

This patch uses the existing 2-byte holes in the rx group for new
u16 variables withtout creating more holes. Below are the pahole
outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
[...]
u32 delivered_ecn_bytes[3]; /* 2744 12 */
/* XXX 4 bytes hole, try to pack */

[...]
__cacheline_group_end__tcp_sock_write_rx[0]; /* 2816 0 */

[...]
/* size: 3264, cachelines: 51, members: 177 */
}

[AFTER THIS PATCH]
struct tcp_sock {
[...]
u32 delivered_ecn_bytes[3]; /* 2744 12 */
u16 pkts_acked_ewma; /* 2756 2 */
/* XXX 2 bytes hole, try to pack */

[...]
__cacheline_group_end__tcp_sock_write_rx[0]; /* 2816 0 */

[...]
/* size: 3264, cachelines: 51, members: 178 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-2-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Ilpo Järvinen and committed by
Paolo Abeni
7885ce01 72163a19

+23 -1
+1
Documentation/networking/net_cachelines/tcp_sock.rst
··· 105 105 u32[3] received_ecn_bytes read_mostly read_write 106 106 u8:4 received_ce_pending read_mostly read_write 107 107 u32[3] delivered_ecn_bytes read_write 108 + u16 pkts_acked_ewma read_write 108 109 u8:2 syn_ect_snt write_mostly read_write 109 110 u8:2 syn_ect_rcv read_mostly read_write 110 111 u8:2 accecn_minlen write_mostly read_write
+1
include/linux/tcp.h
··· 342 342 u32 rate_interval_us; /* saved rate sample: time elapsed */ 343 343 u32 rcv_rtt_last_tsecr; 344 344 u32 delivered_ecn_bytes[3]; 345 + u16 pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */ 345 346 u64 first_tx_mstamp; /* start of window send phase */ 346 347 u64 delivered_mstamp; /* time we reached "delivered" */ 347 348 u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+2
net/ipv4/tcp.c
··· 3470 3470 tcp_accecn_init_counters(tp); 3471 3471 tp->prev_ecnfield = 0; 3472 3472 tp->accecn_opt_tstamp = 0; 3473 + tp->pkts_acked_ewma = 0; 3473 3474 if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) 3474 3475 icsk->icsk_ca_ops->release(sk); 3475 3476 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); ··· 5244 5243 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us); 5245 5244 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr); 5246 5245 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_ecn_bytes); 5246 + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, pkts_acked_ewma); 5247 5247 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp); 5248 5248 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp); 5249 5249 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
+19 -1
net/ipv4/tcp_input.c
··· 488 488 tcp_count_delivered_ce(tp, delivered); 489 489 } 490 490 491 + #define PKTS_ACKED_WEIGHT 6 492 + #define PKTS_ACKED_PREC 6 493 + #define ACK_COMP_THRESH 4 494 + 491 495 /* Returns the ECN CE delta */ 492 496 static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, 493 497 u32 delivered_pkts, u32 delivered_bytes, ··· 503 499 u32 delta, safe_delta, d_ceb; 504 500 bool opt_deltas_valid; 505 501 u32 corrected_ace; 502 + u32 ewma; 506 503 507 504 /* Reordered ACK or uncertain due to lack of data to send and ts */ 508 505 if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS))) ··· 511 506 512 507 opt_deltas_valid = tcp_accecn_process_option(tp, skb, 513 508 delivered_bytes, flag); 509 + 510 + if (delivered_pkts) { 511 + if (!tp->pkts_acked_ewma) { 512 + ewma = delivered_pkts << PKTS_ACKED_PREC; 513 + } else { 514 + ewma = tp->pkts_acked_ewma; 515 + ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) + 516 + (delivered_pkts << PKTS_ACKED_PREC)) >> 517 + PKTS_ACKED_WEIGHT; 518 + } 519 + tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU); 520 + } 514 521 515 522 if (!(flag & FLAG_SLOWPATH)) { 516 523 /* AccECN counter might overflow on large ACKs */ ··· 572 555 if (d_ceb < 573 556 safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT) 574 557 return delta; 575 - } 558 + } else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC)) 559 + return delta; 576 560 577 561 return safe_delta; 578 562 }