Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mptcp-make-add_addr-retransmission-timeout-adaptive'

Matthieu Baerts says:

====================
mptcp: make ADD_ADDR retransmission timeout adaptive

Currently, the MPTCP ADD_ADDR notifications are retransmitted after a
fixed timeout controlled by the net.mptcp.add_addr_timeout sysctl knob,
if the corresponding "echo" packets are not received before. This can be
too slow (or too quick), especially with a too cautious default value
set to 2 minutes.

- Patch 1: make ADD_ADDR retransmission timeout adaptive, using the
TCP's retransmission timeout. The corresponding sysctl knob is now
used as a maximum value.

- Patch 2: now that these ADD_ADDR retransmissions can happen faster,
all MPTCP Join subtests checking ADD_ADDR counters accept more
ADD_ADDR than expected (if any). This is aligned with the previous
behaviour, when the ADD_ADDR RTO was lowered down to 1 second.

- Patch 3: Some CIs have reported that some MPTCP Join signalling tests
were unstable. It seems that it is due to the time it can take in slow
environments to send a bunch of ADD_ADDR notifications and wait each
time for their echo reply. Use a longer transfer to avoid such errors.

v1: https://lore.kernel.org/d5397026-92eb-4a43-9534-954b43ab9305@kernel.org
====================

Link: https://patch.msgid.link/20250907-net-next-mptcp-add_addr-retrans-adapt-v1-0-824cc805772b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+40 -21
+5 -3
Documentation/networking/mptcp-sysctl.rst
··· 8 8 =============================== 9 9 10 10 add_addr_timeout - INTEGER (seconds) 11 - Set the timeout after which an ADD_ADDR control message will be 12 - resent to an MPTCP peer that has not acknowledged a previous 13 - ADD_ADDR message. 11 + Set the maximum value of timeout after which an ADD_ADDR control message 12 + will be resent to an MPTCP peer that has not acknowledged a previous 13 + ADD_ADDR message. A dynamically estimated retransmission timeout based 14 + on the estimated connection round-trip-time is used if this value is 15 + lower than the maximum one. 14 16 15 17 Do not retransmit if set to 0. 16 18
+24 -4
net/mptcp/pm.c
··· 268 268 return -EINVAL; 269 269 } 270 270 271 + static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) 272 + { 273 + const struct net *net = sock_net((struct sock *)msk); 274 + unsigned int rto = mptcp_get_add_addr_timeout(net); 275 + struct mptcp_subflow_context *subflow; 276 + unsigned int max = 0; 277 + 278 + mptcp_for_each_subflow(msk, subflow) { 279 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 280 + struct inet_connection_sock *icsk = inet_csk(ssk); 281 + 282 + if (icsk->icsk_rto > max) 283 + max = icsk->icsk_rto; 284 + } 285 + 286 + if (max && max < rto) 287 + rto = max; 288 + 289 + return rto; 290 + } 291 + 271 292 static void mptcp_pm_add_timer(struct timer_list *timer) 272 293 { 273 294 struct mptcp_pm_add_entry *entry = timer_container_of(entry, timer, ··· 313 292 goto out; 314 293 } 315 294 316 - timeout = mptcp_get_add_addr_timeout(sock_net(sk)); 295 + timeout = mptcp_adjust_add_addr_timeout(msk); 317 296 if (!timeout) 318 297 goto out; 319 298 ··· 328 307 329 308 if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 330 309 sk_reset_timer(sk, timer, 331 - jiffies + timeout); 310 + jiffies + (timeout << entry->retrans_times)); 332 311 333 312 spin_unlock_bh(&msk->pm.lock); 334 313 ··· 369 348 { 370 349 struct mptcp_pm_add_entry *add_entry = NULL; 371 350 struct sock *sk = (struct sock *)msk; 372 - struct net *net = sock_net(sk); 373 351 unsigned int timeout; 374 352 375 353 lockdep_assert_held(&msk->pm.lock); ··· 394 374 395 375 timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 396 376 reset_timer: 397 - timeout = mptcp_get_add_addr_timeout(net); 377 + timeout = mptcp_adjust_add_addr_timeout(msk); 398 378 if (timeout) 399 379 sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); 400 380
+11 -14
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 358 358 tables="${ip6tables}" 359 359 fi 360 360 361 + # set a maximum, to avoid too long timeout with exponential backoff 361 362 ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 362 363 363 364 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ ··· 1670 1669 local tx="" 1671 1670 local rx="" 1672 1671 local count 1673 - local timeout 1674 1672 1675 1673 if [[ $ns_invert = "invert" ]]; then 1676 1674 ns_tx=$ns2 ··· 1678 1678 rx=" server" 1679 1679 fi 1680 1680 1681 - timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) 1682 - 1683 1681 print_check "add addr rx${rx}" 1684 1682 count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") 1685 1683 if [ -z "$count" ]; then 1686 1684 print_skip 1687 - # if the test configured a short timeout tolerate greater then expected 1688 - # add addrs options, due to retransmissions 1689 - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then 1685 + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions 1686 + elif [ "$count" != "$add_nr" ] && 1687 + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then 1690 1688 fail_test "got $count ADD_ADDR[s] expected $add_nr" 1691 1689 else 1692 1690 print_ok ··· 1772 1774 { 1773 1775 local add_tx_nr=$1 1774 1776 local echo_tx_nr=$2 1775 - local timeout 1776 1777 local count 1777 - 1778 - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) 1779 1778 1780 1779 print_check "add addr tx" 1781 1780 count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") 1782 1781 if [ -z "$count" ]; then 1783 1782 print_skip 1784 - # if the test configured a short timeout tolerate greater then expected 1785 - # add addrs options, due to retransmissions 1786 - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then 1783 + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions 1784 + elif [ "$count" != "$add_tx_nr" ] && 1785 + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then 1787 1786 fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" 1788 1787 else 1789 1788 print_ok ··· 2268 2273 pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2269 2274 pm_nl_add_endpoint $ns1 10.0.4.1 flags signal 2270 2275 pm_nl_set_limits $ns2 3 3 2271 - run_tests $ns1 $ns2 10.0.1.1 2276 + speed=slow \ 2277 + run_tests $ns1 $ns2 10.0.1.1 2272 2278 chk_join_nr 3 3 3 2273 2279 chk_add_nr 3 3 2274 2280 fi ··· 2281 2285 pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2282 2286 pm_nl_add_endpoint $ns1 10.0.14.1 flags signal 2283 2287 pm_nl_set_limits $ns2 3 3 2284 - run_tests $ns1 $ns2 10.0.1.1 2288 + speed=slow \ 2289 + run_tests $ns1 $ns2 10.0.1.1 2285 2290 join_syn_tx=3 \ 2286 2291 chk_join_nr 1 1 1 2287 2292 chk_add_nr 3 3