Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: add cwnd_event_tx_start to tcp_congestion_ops

(tcp_congestion_ops)->cwnd_event() is called very often, with
@event oscillating between CA_EVENT_TX_START and other values.

This is not branch prediction friendly.

Provide a new cwnd_event_tx_start pointer dedicated for CA_EVENT_TX_START.

Both BBR and CUBIC benefit from this change, since they only care
about CA_EVENT_TX_START.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 4/4 grow/shrink: 3/1 up/down: 564/-568 (-4)
Function old new delta
bbr_cwnd_event_tx_start - 450 +450
cubictcp_cwnd_event_tx_start - 70 +70
__pfx_cubictcp_cwnd_event_tx_start - 16 +16
__pfx_bbr_cwnd_event_tx_start - 16 +16
tcp_unregister_congestion_control 93 99 +6
tcp_update_congestion_control 518 521 +3
tcp_register_congestion_control 422 425 +3
__tcp_transmit_skb 3308 3306 -2
__pfx_cubictcp_cwnd_event 16 - -16
__pfx_bbr_cwnd_event 16 - -16
cubictcp_cwnd_event 80 - -80
bbr_cwnd_event 454 - -454
Total: Before=25240512, After=25240508, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260323234920.1097858-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
d1e59a46 112f4c63

+85 -51
+8
include/net/tcp.h
··· 1341 1341 /* call when cwnd event occurs (optional) */ 1342 1342 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); 1343 1343 1344 + /* call when CA_EVENT_TX_START cwnd event occurs (optional) */ 1345 + void (*cwnd_event_tx_start)(struct sock *sk); 1346 + 1344 1347 /* call when ack arrives (optional) */ 1345 1348 void (*in_ack_event)(struct sock *sk, u32 flags); 1346 1349 ··· 1443 1440 { 1444 1441 const struct inet_connection_sock *icsk = inet_csk(sk); 1445 1442 1443 + if (event == CA_EVENT_TX_START) { 1444 + if (icsk->icsk_ca_ops->cwnd_event_tx_start) 1445 + icsk->icsk_ca_ops->cwnd_event_tx_start(sk); 1446 + return; 1447 + } 1446 1448 if (icsk->icsk_ca_ops->cwnd_event) 1447 1449 icsk->icsk_ca_ops->cwnd_event(sk, event); 1448 1450 }
+5
net/ipv4/bpf_tcp_ca.c
··· 272 272 { 273 273 } 274 274 275 + static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk) 276 + { 277 + } 278 + 275 279 static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags) 276 280 { 277 281 } ··· 317 313 .cong_avoid = bpf_tcp_ca_cong_avoid, 318 314 .set_state = bpf_tcp_ca_set_state, 319 315 .cwnd_event = bpf_tcp_ca_cwnd_event, 316 + .cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start, 320 317 .in_ack_event = bpf_tcp_ca_in_ack_event, 321 318 .pkts_acked = bpf_tcp_ca_pkts_acked, 322 319 .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+4 -4
net/ipv4/tcp_bbr.c
··· 330 330 bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); 331 331 } 332 332 333 - __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) 333 + __bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk) 334 334 { 335 335 struct tcp_sock *tp = tcp_sk(sk); 336 336 struct bbr *bbr = inet_csk_ca(sk); 337 337 338 - if (event == CA_EVENT_TX_START && tp->app_limited) { 338 + if (tp->app_limited) { 339 339 bbr->idle_restart = 1; 340 340 bbr->ack_epoch_mstamp = tp->tcp_mstamp; 341 341 bbr->ack_epoch_acked = 0; ··· 1149 1149 .cong_control = bbr_main, 1150 1150 .sndbuf_expand = bbr_sndbuf_expand, 1151 1151 .undo_cwnd = bbr_undo_cwnd, 1152 - .cwnd_event = bbr_cwnd_event, 1152 + .cwnd_event_tx_start = bbr_cwnd_event_tx_start, 1153 1153 .ssthresh = bbr_ssthresh, 1154 1154 .min_tso_segs = bbr_min_tso_segs, 1155 1155 .get_info = bbr_get_info, ··· 1161 1161 BTF_ID_FLAGS(func, bbr_main) 1162 1162 BTF_ID_FLAGS(func, bbr_sndbuf_expand) 1163 1163 BTF_ID_FLAGS(func, bbr_undo_cwnd) 1164 - BTF_ID_FLAGS(func, bbr_cwnd_event) 1164 + BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start) 1165 1165 BTF_ID_FLAGS(func, bbr_ssthresh) 1166 1166 BTF_ID_FLAGS(func, bbr_min_tso_segs) 1167 1167 BTF_ID_FLAGS(func, bbr_set_state)
+14 -17
net/ipv4/tcp_cubic.c
··· 139 139 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 140 140 } 141 141 142 - __bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) 142 + __bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) 143 143 { 144 - if (event == CA_EVENT_TX_START) { 145 - struct bictcp *ca = inet_csk_ca(sk); 146 - u32 now = tcp_jiffies32; 147 - s32 delta; 144 + struct bictcp *ca = inet_csk_ca(sk); 145 + u32 now = tcp_jiffies32; 146 + s32 delta; 148 147 149 - delta = now - tcp_sk(sk)->lsndtime; 148 + delta = now - tcp_sk(sk)->lsndtime; 150 149 151 - /* We were application limited (idle) for a while. 152 - * Shift epoch_start to keep cwnd growth to cubic curve. 153 - */ 154 - if (ca->epoch_start && delta > 0) { 155 - ca->epoch_start += delta; 156 - if (after(ca->epoch_start, now)) 157 - ca->epoch_start = now; 158 - } 159 - return; 150 + /* We were application limited (idle) for a while. 151 + * Shift epoch_start to keep cwnd growth to cubic curve. 152 + */ 153 + if (ca->epoch_start && delta > 0) { 154 + ca->epoch_start += delta; 155 + if (after(ca->epoch_start, now)) 156 + ca->epoch_start = now; 160 157 } 161 158 } 162 159 ··· 478 481 .cong_avoid = cubictcp_cong_avoid, 479 482 .set_state = cubictcp_state, 480 483 .undo_cwnd = tcp_reno_undo_cwnd, 481 - .cwnd_event = cubictcp_cwnd_event, 484 + .cwnd_event_tx_start = cubictcp_cwnd_event_tx_start, 482 485 .pkts_acked = cubictcp_acked, 483 486 .owner = THIS_MODULE, 484 487 .name = "cubic", ··· 489 492 BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) 490 493 BTF_ID_FLAGS(func, cubictcp_cong_avoid) 491 494 BTF_ID_FLAGS(func, cubictcp_state) 492 - BTF_ID_FLAGS(func, cubictcp_cwnd_event) 495 + BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start) 493 496 BTF_ID_FLAGS(func, cubictcp_acked) 494 497 BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) 495 498
+9 -3
net/ipv4/tcp_dctcp.c
··· 203 203 tcp_plb_update_state_upon_rto(sk, &ca->plb); 204 204 dctcp_react_to_loss(sk); 205 205 break; 206 - case CA_EVENT_TX_START: 207 - tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ 208 - break; 209 206 default: 210 207 /* Don't care for the rest. */ 211 208 break; 212 209 } 210 + } 211 + 212 + __bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk) 213 + { 214 + struct dctcp *ca = inet_csk_ca(sk); 215 + 216 + tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ 213 217 } 214 218 215 219 static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, ··· 256 252 .init = dctcp_init, 257 253 .in_ack_event = dctcp_update_alpha, 258 254 .cwnd_event = dctcp_cwnd_event, 255 + .cwnd_event_tx_start = dctcp_cwnd_event_tx_start, 259 256 .ssthresh = dctcp_ssthresh, 260 257 .cong_avoid = tcp_reno_cong_avoid, 261 258 .undo_cwnd = dctcp_cwnd_undo, ··· 280 275 BTF_ID_FLAGS(func, dctcp_init) 281 276 BTF_ID_FLAGS(func, dctcp_update_alpha) 282 277 BTF_ID_FLAGS(func, dctcp_cwnd_event) 278 + BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start) 283 279 BTF_ID_FLAGS(func, dctcp_ssthresh) 284 280 BTF_ID_FLAGS(func, dctcp_cwnd_undo) 285 281 BTF_ID_FLAGS(func, dctcp_state)
+7 -2
net/ipv4/tcp_vegas.c
··· 151 151 */ 152 152 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) 153 153 { 154 - if (event == CA_EVENT_CWND_RESTART || 155 - event == CA_EVENT_TX_START) 154 + if (event == CA_EVENT_CWND_RESTART) 156 155 tcp_vegas_init(sk); 157 156 } 158 157 EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); 158 + 159 + void tcp_vegas_cwnd_event_tx_start(struct sock *sk) 160 + { 161 + tcp_vegas_init(sk); 162 + } 163 + EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event_tx_start); 159 164 160 165 static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) 161 166 {
+1
net/ipv4/tcp_vegas.h
··· 20 20 void tcp_vegas_state(struct sock *sk, u8 ca_state); 21 21 void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample); 22 22 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); 23 + void tcp_vegas_cwnd_event_tx_start(struct sock *sk); 23 24 size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, 24 25 union tcp_cc_info *info); 25 26
+7 -1
net/ipv4/tcp_veno.c
··· 112 112 */ 113 113 static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) 114 114 { 115 - if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) 115 + if (event == CA_EVENT_CWND_RESTART) 116 116 tcp_veno_init(sk); 117 + } 118 + 119 + static void tcp_veno_cwnd_event_tx_start(struct sock *sk) 120 + { 121 + tcp_veno_init(sk); 117 122 } 118 123 119 124 static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) ··· 218 213 .pkts_acked = tcp_veno_pkts_acked, 219 214 .set_state = tcp_veno_state, 220 215 .cwnd_event = tcp_veno_cwnd_event, 216 + .cwnd_event_tx_start = tcp_veno_cwnd_event_tx_start, 221 217 222 218 .owner = THIS_MODULE, 223 219 .name = "veno",
+1
net/ipv4/tcp_yeah.c
··· 212 212 .cong_avoid = tcp_yeah_cong_avoid, 213 213 .set_state = tcp_vegas_state, 214 214 .cwnd_event = tcp_vegas_cwnd_event, 215 + .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start, 215 216 .get_info = tcp_vegas_get_info, 216 217 .pkts_acked = tcp_vegas_pkts_acked, 217 218
+4 -4
tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
··· 23 23 #define TCP_REORDERING (12) 24 24 25 25 extern void cubictcp_init(struct sock *sk) __ksym; 26 - extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; 26 + extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym; 27 27 extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; 28 28 extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym; 29 29 extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; ··· 108 108 } 109 109 110 110 SEC("struct_ops") 111 - void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) 111 + void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) 112 112 { 113 - cubictcp_cwnd_event(sk, event); 113 + cubictcp_cwnd_event_tx_start(sk); 114 114 } 115 115 116 116 SEC("struct_ops") ··· 172 172 .cong_control = (void *)bpf_cubic_cong_control, 173 173 .set_state = (void *)bpf_cubic_state, 174 174 .undo_cwnd = (void *)bpf_cubic_undo_cwnd, 175 - .cwnd_event = (void *)bpf_cubic_cwnd_event, 175 + .cwnd_event_tx_start = (void *)bpf_cubic_cwnd_event_tx_start, 176 176 .pkts_acked = (void *)bpf_cubic_acked, 177 177 .name = "bpf_cc_cubic", 178 178 };
+13 -16
tools/testing/selftests/bpf/progs/bpf_cubic.c
··· 185 185 } 186 186 187 187 SEC("struct_ops") 188 - void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) 188 + void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) 189 189 { 190 - if (event == CA_EVENT_TX_START) { 191 - struct bpf_bictcp *ca = inet_csk_ca(sk); 192 - __u32 now = tcp_jiffies32; 193 - __s32 delta; 190 + struct bpf_bictcp *ca = inet_csk_ca(sk); 191 + __u32 now = tcp_jiffies32; 192 + __s32 delta; 194 193 195 - delta = now - tcp_sk(sk)->lsndtime; 194 + delta = now - tcp_sk(sk)->lsndtime; 196 195 197 - /* We were application limited (idle) for a while. 198 - * Shift epoch_start to keep cwnd growth to cubic curve. 199 - */ 200 - if (ca->epoch_start && delta > 0) { 201 - ca->epoch_start += delta; 202 - if (after(ca->epoch_start, now)) 203 - ca->epoch_start = now; 204 - } 205 - return; 196 + /* We were application limited (idle) for a while. 197 + * Shift epoch_start to keep cwnd growth to cubic curve. 198 + */ 199 + if (ca->epoch_start && delta > 0) { 200 + ca->epoch_start += delta; 201 + if (after(ca->epoch_start, now)) 202 + ca->epoch_start = now; 206 203 } 207 204 } 208 205 ··· 534 537 .cong_avoid = (void *)bpf_cubic_cong_avoid, 535 538 .set_state = (void *)bpf_cubic_state, 536 539 .undo_cwnd = (void *)bpf_cubic_undo_cwnd, 537 - .cwnd_event = (void *)bpf_cubic_cwnd_event, 540 + .cwnd_event_tx_start = (void *)bpf_cubic_cwnd_event_tx_start, 538 541 .pkts_acked = (void *)bpf_cubic_acked, 539 542 .name = "bpf_cubic", 540 543 };
+12 -4
tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
··· 8 8 extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym; 9 9 extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym; 10 10 extern u32 bbr_undo_cwnd(struct sock *sk) __ksym; 11 - extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; 11 + extern void bbr_cwnd_event_tx_start(struct sock *sk) __ksym; 12 12 extern u32 bbr_ssthresh(struct sock *sk) __ksym; 13 13 extern u32 bbr_min_tso_segs(struct sock *sk) __ksym; 14 14 extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym; ··· 16 16 extern void dctcp_init(struct sock *sk) __ksym; 17 17 extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym; 18 18 extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym; 19 + extern void dctcp_cwnd_event_tx_start(struct sock *sk) __ksym; 19 20 extern u32 dctcp_ssthresh(struct sock *sk) __ksym; 20 21 extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym; 21 22 extern void dctcp_state(struct sock *sk, u8 new_state) __ksym; ··· 25 24 extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; 26 25 extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym; 27 26 extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym; 28 - extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; 27 + extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym; 29 28 extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; 30 29 31 30 SEC("struct_ops") ··· 70 69 SEC("struct_ops") 71 70 void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event) 72 71 { 73 - bbr_cwnd_event(sk, event); 74 72 dctcp_cwnd_event(sk, event); 75 - cubictcp_cwnd_event(sk, event); 73 + } 74 + 75 + SEC("struct_ops") 76 + void BPF_PROG(cwnd_event_tx_start, struct sock *sk) 77 + { 78 + bbr_cwnd_event_tx_start(sk); 79 + dctcp_cwnd_event_tx_start(sk); 80 + cubictcp_cwnd_event_tx_start(sk); 76 81 } 77 82 78 83 SEC("struct_ops") ··· 118 111 .sndbuf_expand = (void *)sndbuf_expand, 119 112 .undo_cwnd = (void *)undo_cwnd, 120 113 .cwnd_event = (void *)cwnd_event, 114 + .cwnd_event_tx_start = (void *)cwnd_event_tx_start, 121 115 .ssthresh = (void *)ssthresh, 122 116 .min_tso_segs = (void *)min_tso_segs, 123 117 .set_state = (void *)set_state,