Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: trace retransmit failures in tcp_retransmit_skb

Background
==========
When TCP retransmits a packet due to missing ACKs, the
retransmission may fail for various reasons (e.g., packets
stuck in driver queues, receiver zero windows, or routing issues).

The original tcp_retransmit_skb tracepoint:

'commit e086101b150a ("tcp: add a tracepoint for tcp retransmission")'

lacks visibility into these failure causes, making production
diagnostics difficult.

Solution
========
Adds the retval("err") to the tcp_retransmit_skb tracepoint.
Enables users to know why some tcp retransmission failed and
users can filter retransmission failures by retval.

Compatibility description
=========================
This patch extends the tcp_retransmit_skb tracepoint
by adding a new "err" field at the end of its
existing structure (within TP_STRUCT__entry). The
compatibility implications are detailed as follows:

1) Structural compatibility for legacy user-space tools
Legacy tools/BPF programs accessing existing fields
(by offset or name) can still work without modification
or recompilation.The new field is appended to the end,
preserving original memory layout.

2) Note: semantic changes
The original tracepoint primarily only focused on
successfully retransmitted packets. With this patch,
the tracepoint now can figure out packets that may
terminate early due to specific reasons. For accurate
statistics, users should filter using "err" to
distinguish outcomes.

Before patched:
field:const void * skbaddr; offset:8; size:8; signed:0;
field:const void * skaddr; offset:16; size:8; signed:0;
field:int state; offset:24; size:4; signed:1;
field:__u16 sport; offset:28; size:2; signed:0;
field:__u16 dport; offset:30; size:2; signed:0;
field:__u16 family; offset:32; size:2; signed:0;
field:__u8 saddr[4]; offset:34; size:4; signed:0;
field:__u8 daddr[4]; offset:38; size:4; signed:0;
field:__u8 saddr_v6[16]; offset:42; size:16; signed:0;
field:__u8 daddr_v6[16]; offset:58; size:16; signed:0;

print fmt: "skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s"

After patched:
field:const void * skbaddr; offset:8; size:8; signed:0;
field:const void * skaddr; offset:16; size:8; signed:0;
field:int state; offset:24; size:4; signed:1;
field:__u16 sport; offset:28; size:2; signed:0;
field:__u16 dport; offset:30; size:2; signed:0;
field:__u16 family; offset:32; size:2; signed:0;
field:__u8 saddr[4]; offset:34; size:4; signed:0;
field:__u8 daddr[4]; offset:38; size:4; signed:0;
field:__u8 saddr_v6[16]; offset:42; size:16; signed:0;
field:__u8 daddr_v6[16]; offset:58; size:16; signed:0;
field:int err; offset:76; size:4; signed:1;

print fmt: "skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s err=%d"

Co-developed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250721111607626_BDnIJB0ywk6FghN63bor@zte.com.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Fan Yu and committed by
Jakub Kicinski
ad892e91 b2dd6eb0

+38 -35
+9 -18
include/trace/events/tcp.h
··· 13 13 #include <linux/sock_diag.h> 14 14 #include <net/rstreason.h> 15 15 16 - /* 17 - * tcp event with arguments sk and skb 18 - * 19 - * Note: this class requires a valid sk pointer; while skb pointer could 20 - * be NULL. 21 - */ 22 - DECLARE_EVENT_CLASS(tcp_event_sk_skb, 16 + TRACE_EVENT(tcp_retransmit_skb, 23 17 24 - TP_PROTO(const struct sock *sk, const struct sk_buff *skb), 18 + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, int err), 25 19 26 - TP_ARGS(sk, skb), 20 + TP_ARGS(sk, skb, err), 27 21 28 22 TP_STRUCT__entry( 29 23 __field(const void *, skbaddr) ··· 30 36 __array(__u8, daddr, 4) 31 37 __array(__u8, saddr_v6, 16) 32 38 __array(__u8, daddr_v6, 16) 39 + __field(int, err) 33 40 ), 34 41 35 42 TP_fast_assign( ··· 53 58 54 59 TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, 55 60 sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); 61 + 62 + __entry->err = err; 56 63 ), 57 64 58 - TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", 65 + TP_printk("skbaddr=%p skaddr=%p family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s err=%d", 59 66 __entry->skbaddr, __entry->skaddr, 60 67 show_family_name(__entry->family), 61 68 __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, 62 69 __entry->saddr_v6, __entry->daddr_v6, 63 - show_tcp_state_name(__entry->state)) 64 - ); 65 - 66 - DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, 67 - 68 - TP_PROTO(const struct sock *sk, const struct sk_buff *skb), 69 - 70 - TP_ARGS(sk, skb) 70 + show_tcp_state_name(__entry->state), 71 + __entry->err) 71 72 ); 72 73 73 74 #undef FN
+29 -17
net/ipv4/tcp_output.c
··· 3330 3330 if (icsk->icsk_mtup.probe_size) 3331 3331 icsk->icsk_mtup.probe_size = 0; 3332 3332 3333 - if (skb_still_in_host_queue(sk, skb)) 3334 - return -EBUSY; 3333 + if (skb_still_in_host_queue(sk, skb)) { 3334 + err = -EBUSY; 3335 + goto out; 3336 + } 3335 3337 3336 3338 start: 3337 3339 if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { ··· 3344 3342 } 3345 3343 if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { 3346 3344 WARN_ON_ONCE(1); 3347 - return -EINVAL; 3345 + err = -EINVAL; 3346 + goto out; 3348 3347 } 3349 - if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) 3350 - return -ENOMEM; 3348 + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) { 3349 + err = -ENOMEM; 3350 + goto out; 3351 + } 3351 3352 } 3352 3353 3353 - if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 3354 - return -EHOSTUNREACH; /* Routing failure or similar. */ 3354 + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) { 3355 + err = -EHOSTUNREACH; /* Routing failure or similar. */ 3356 + goto out; 3357 + } 3355 3358 3356 3359 cur_mss = tcp_current_mss(sk); 3357 3360 avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; ··· 3367 3360 * our retransmit of one segment serves as a zero window probe. 3368 3361 */ 3369 3362 if (avail_wnd <= 0) { 3370 - if (TCP_SKB_CB(skb)->seq != tp->snd_una) 3371 - return -EAGAIN; 3363 + if (TCP_SKB_CB(skb)->seq != tp->snd_una) { 3364 + err = -EAGAIN; 3365 + goto out; 3366 + } 3372 3367 avail_wnd = cur_mss; 3373 3368 } 3374 3369 ··· 3382 3373 } 3383 3374 if (skb->len > len) { 3384 3375 if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, 3385 - cur_mss, GFP_ATOMIC)) 3386 - return -ENOMEM; /* We'll try again later. */ 3376 + cur_mss, GFP_ATOMIC)) { 3377 + err = -ENOMEM; /* We'll try again later. */ 3378 + goto out; 3379 + } 3387 3380 } else { 3388 - if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) 3389 - return -ENOMEM; 3381 + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) { 3382 + err = -ENOMEM; 3383 + goto out; 3384 + } 3390 3385 3391 3386 diff = tcp_skb_pcount(skb); 3392 3387 tcp_set_skb_tso_segs(skb, cur_mss); ··· 3444 3431 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, 3445 3432 TCP_SKB_CB(skb)->seq, segs, err); 3446 3433 3447 - if (likely(!err)) { 3448 - trace_tcp_retransmit_skb(sk, skb); 3449 - } else if (err != -EBUSY) { 3434 + if (unlikely(err) && err != -EBUSY) 3450 3435 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); 3451 - } 3452 3436 3453 3437 /* To avoid taking spuriously low RTT samples based on a timestamp 3454 3438 * for a transmit that never happened, always mark EVER_RETRANS 3455 3439 */ 3456 3440 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; 3457 3441 3442 + out: 3443 + trace_tcp_retransmit_skb(sk, skb, err); 3458 3444 return err; 3459 3445 } 3460 3446