Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mptcp-misc-features-for-v6-18'

Matthieu Baerts says:

====================
mptcp: misc. features for v6.18

This series contains 4 independent new features:

- Patch 1: use HMAC-SHA256 library instead of open-coded HMAC.

- Patch 2: selftests: check for unexpected fallback counter increments.

- Patches 3-4: record subflows in RPS table, for aRFS support.

v1: https://lore.kernel.org/20250901-net-next-mptcp-misc-feat-6-18-v1-0-80ae80d2b903@kernel.org
====================

Link: https://patch.msgid.link/20250902-net-next-mptcp-misc-feat-6-18-v2-0-fa02bb3188b1@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+202 -62
+56 -29
include/net/rps.h
··· 85 85 WRITE_ONCE(table->ents[index], val); 86 86 } 87 87 88 - #endif /* CONFIG_RPS */ 89 - 90 - static inline void sock_rps_record_flow_hash(__u32 hash) 88 + static inline void _sock_rps_record_flow_hash(__u32 hash) 91 89 { 92 - #ifdef CONFIG_RPS 93 90 struct rps_sock_flow_table *sock_flow_table; 94 91 95 92 if (!hash) ··· 96 99 if (sock_flow_table) 97 100 rps_record_sock_flow(sock_flow_table, hash); 98 101 rcu_read_unlock(); 99 - #endif 100 102 } 101 103 102 - static inline void sock_rps_record_flow(const struct sock *sk) 104 + static inline void _sock_rps_record_flow(const struct sock *sk) 103 105 { 104 - #ifdef CONFIG_RPS 105 - if (static_branch_unlikely(&rfs_needed)) { 106 - /* Reading sk->sk_rxhash might incur an expensive cache line 107 - * miss. 108 - * 109 - * TCP_ESTABLISHED does cover almost all states where RFS 110 - * might be useful, and is cheaper [1] than testing : 111 - * IPv4: inet_sk(sk)->inet_daddr 112 - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) 113 - * OR an additional socket flag 114 - * [1] : sk_state and sk_prot are in the same cache line. 106 + /* Reading sk->sk_rxhash might incur an expensive cache line 107 + * miss. 108 + * 109 + * TCP_ESTABLISHED does cover almost all states where RFS 110 + * might be useful, and is cheaper [1] than testing : 111 + * IPv4: inet_sk(sk)->inet_daddr 112 + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) 113 + * OR an additional socket flag 114 + * [1] : sk_state and sk_prot are in the same cache line. 115 + */ 116 + if (sk->sk_state == TCP_ESTABLISHED) { 117 + /* This READ_ONCE() is paired with the WRITE_ONCE() 118 + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). 115 119 */ 116 - if (sk->sk_state == TCP_ESTABLISHED) { 117 - /* This READ_ONCE() is paired with the WRITE_ONCE() 118 - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). 119 - */ 120 - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); 121 - } 120 + _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); 122 121 } 123 - #endif 124 122 } 125 123 126 - static inline void sock_rps_delete_flow(const struct sock *sk) 124 + static inline void _sock_rps_delete_flow(const struct sock *sk) 127 125 { 128 - #ifdef CONFIG_RPS 129 126 struct rps_sock_flow_table *table; 130 127 u32 hash, index; 131 - 132 - if (!static_branch_unlikely(&rfs_needed)) 133 - return; 134 128 135 129 hash = READ_ONCE(sk->sk_rxhash); 136 130 if (!hash) ··· 135 147 WRITE_ONCE(table->ents[index], RPS_NO_CPU); 136 148 } 137 149 rcu_read_unlock(); 150 + } 151 + #endif /* CONFIG_RPS */ 152 + 153 + static inline bool rfs_is_needed(void) 154 + { 155 + #ifdef CONFIG_RPS 156 + return static_branch_unlikely(&rfs_needed); 157 + #else 158 + return false; 159 + #endif 160 + } 161 + 162 + static inline void sock_rps_record_flow_hash(__u32 hash) 163 + { 164 + #ifdef CONFIG_RPS 165 + if (!rfs_is_needed()) 166 + return; 167 + 168 + _sock_rps_record_flow_hash(hash); 169 + #endif 170 + } 171 + 172 + static inline void sock_rps_record_flow(const struct sock *sk) 173 + { 174 + #ifdef CONFIG_RPS 175 + if (!rfs_is_needed()) 176 + return; 177 + 178 + _sock_rps_record_flow(sk); 179 + #endif 180 + } 181 + 182 + static inline void sock_rps_delete_flow(const struct sock *sk) 183 + { 184 + #ifdef CONFIG_RPS 185 + if (!rfs_is_needed()) 186 + return; 187 + 188 + _sock_rps_delete_flow(sk); 138 189 #endif 139 190 } 140 191
+2 -33
net/mptcp/crypto.c
··· 22 22 23 23 #include <linux/kernel.h> 24 24 #include <crypto/sha2.h> 25 - #include <linux/unaligned.h> 26 25 27 26 #include "protocol.h" 28 27 ··· 42 43 43 44 void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) 44 45 { 45 - u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; 46 - u8 key1be[8]; 47 - u8 key2be[8]; 48 - int i; 46 + __be64 key[2] = { cpu_to_be64(key1), cpu_to_be64(key2) }; 49 47 50 - if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE)) 51 - len = SHA256_DIGEST_SIZE; 52 - 53 - put_unaligned_be64(key1, key1be); 54 - put_unaligned_be64(key2, key2be); 55 - 56 - /* Generate key xored with ipad */ 57 - memset(input, 0x36, SHA256_BLOCK_SIZE); 58 - for (i = 0; i < 8; i++) 59 - input[i] ^= key1be[i]; 60 - for (i = 0; i < 8; i++) 61 - input[i + 8] ^= key2be[i]; 62 - 63 - memcpy(&input[SHA256_BLOCK_SIZE], msg, len); 64 - 65 - /* emit sha256(K1 || msg) on the second input block, so we can 66 - * reuse 'input' for the last hashing 67 - */ 68 - sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]); 69 - 70 - /* Prepare second part of hmac */ 71 - memset(input, 0x5C, SHA256_BLOCK_SIZE); 72 - for (i = 0; i < 8; i++) 73 - input[i] ^= key1be[i]; 74 - for (i = 0; i < 8; i++) 75 - input[i + 8] ^= key2be[i]; 76 - 77 - sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac); 48 + hmac_sha256_usingrawkey((const u8 *)key, sizeof(key), msg, len, hmac); 78 49 } 79 50 80 51 #if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
+21
net/mptcp/protocol.c
··· 12 12 #include <linux/sched/signal.h> 13 13 #include <linux/atomic.h> 14 14 #include <net/aligned_data.h> 15 + #include <net/rps.h> 15 16 #include <net/sock.h> 16 17 #include <net/inet_common.h> 17 18 #include <net/inet_hashtables.h> ··· 1741 1740 return limit - not_sent; 1742 1741 } 1743 1742 1743 + static void mptcp_rps_record_subflows(const struct mptcp_sock *msk) 1744 + { 1745 + struct mptcp_subflow_context *subflow; 1746 + 1747 + if (!rfs_is_needed()) 1748 + return; 1749 + 1750 + mptcp_for_each_subflow(msk, subflow) { 1751 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1752 + 1753 + sock_rps_record_flow(ssk); 1754 + } 1755 + } 1756 + 1744 1757 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 1745 1758 { 1746 1759 struct mptcp_sock *msk = mptcp_sk(sk); ··· 1767 1752 msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; 1768 1753 1769 1754 lock_sock(sk); 1755 + 1756 + mptcp_rps_record_subflows(msk); 1770 1757 1771 1758 if (unlikely(inet_test_bit(DEFER_CONNECT, sk) || 1772 1759 msg->msg_flags & MSG_FASTOPEN)) { ··· 2147 2130 copied = -ENOTCONN; 2148 2131 goto out_err; 2149 2132 } 2133 + 2134 + mptcp_rps_record_subflows(msk); 2150 2135 2151 2136 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2152 2137 ··· 3940 3921 if (!ssk->sk_socket) 3941 3922 mptcp_sock_graft(ssk, newsock); 3942 3923 } 3924 + 3925 + mptcp_rps_record_subflows(msk); 3943 3926 3944 3927 /* Do late cleanup for the first subflow as necessary. Also 3945 3928 * deal with bad peers not doing a complete shutdown.
+123
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 74 74 unset join_bind_err 75 75 unset join_connect_err 76 76 77 + unset fb_ns1 78 + unset fb_ns2 79 + unset fb_infinite_map_tx 80 + unset fb_dss_corruption 81 + unset fb_simult_conn 82 + unset fb_mpc_passive 83 + unset fb_mpc_active 84 + unset fb_mpc_data 85 + unset fb_md5_sig 86 + unset fb_dss 87 + 77 88 # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || 78 89 # (ip6 && (ip6[74] & 0xf0) == 0x30)'" 79 90 CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, ··· 1410 1399 print_results "join Tx" ${rc} 1411 1400 } 1412 1401 1402 + chk_fallback_nr() 1403 + { 1404 + local infinite_map_tx=${fb_infinite_map_tx:-0} 1405 + local dss_corruption=${fb_dss_corruption:-0} 1406 + local simult_conn=${fb_simult_conn:-0} 1407 + local mpc_passive=${fb_mpc_passive:-0} 1408 + local mpc_active=${fb_mpc_active:-0} 1409 + local mpc_data=${fb_mpc_data:-0} 1410 + local md5_sig=${fb_md5_sig:-0} 1411 + local dss=${fb_dss:-0} 1412 + local rc=${KSFT_PASS} 1413 + local ns=$1 1414 + local count 1415 + 1416 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") 1417 + if [ -z "$count" ]; then 1418 + rc=${KSFT_SKIP} 1419 + elif [ "$count" != "$infinite_map_tx" ]; then 1420 + rc=${KSFT_FAIL} 1421 + print_check "$ns infinite map tx fallback" 1422 + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" 1423 + fi 1424 + 1425 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") 1426 + if [ -z "$count" ]; then 1427 + rc=${KSFT_SKIP} 1428 + elif [ "$count" != "$dss_corruption" ]; then 1429 + rc=${KSFT_FAIL} 1430 + print_check "$ns dss corruption fallback" 1431 + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" 1432 + fi 1433 + 1434 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") 1435 + if [ -z "$count" ]; then 1436 + rc=${KSFT_SKIP} 1437 + elif [ "$count" != "$simult_conn" ]; then 1438 + rc=${KSFT_FAIL} 1439 + print_check "$ns simult conn fallback" 1440 + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" 1441 + fi 1442 + 1443 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") 1444 + if [ -z "$count" ]; then 1445 + rc=${KSFT_SKIP} 1446 + elif [ "$count" != "$mpc_passive" ]; then 1447 + rc=${KSFT_FAIL} 1448 + print_check "$ns mpc passive fallback" 1449 + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" 1450 + fi 1451 + 1452 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") 1453 + if [ -z "$count" ]; then 1454 + rc=${KSFT_SKIP} 1455 + elif [ "$count" != "$mpc_active" ]; then 1456 + rc=${KSFT_FAIL} 1457 + print_check "$ns mpc active fallback" 1458 + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" 1459 + fi 1460 + 1461 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") 1462 + if [ -z "$count" ]; then 1463 + rc=${KSFT_SKIP} 1464 + elif [ "$count" != "$mpc_data" ]; then 1465 + rc=${KSFT_FAIL} 1466 + print_check "$ns mpc data fallback" 1467 + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" 1468 + fi 1469 + 1470 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") 1471 + if [ -z "$count" ]; then 1472 + rc=${KSFT_SKIP} 1473 + elif [ "$count" != "$md5_sig" ]; then 1474 + rc=${KSFT_FAIL} 1475 + print_check "$ns MD5 Sig fallback" 1476 + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" 1477 + fi 1478 + 1479 + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") 1480 + if [ -z "$count" ]; then 1481 + rc=${KSFT_SKIP} 1482 + elif [ "$count" != "$dss" ]; then 1483 + rc=${KSFT_FAIL} 1484 + print_check "$ns dss fallback" 1485 + fail_test "got $count dss fallback[s] in $ns expected $dss" 1486 + fi 1487 + 1488 + return $rc 1489 + } 1490 + 1491 + chk_fallback_nr_all() 1492 + { 1493 + local netns=("ns1" "ns2") 1494 + local fb_ns=("fb_ns1" "fb_ns2") 1495 + local rc=${KSFT_PASS} 1496 + 1497 + for i in 0 1; do 1498 + if [ -n "${!fb_ns[i]}" ]; then 1499 + eval "${!fb_ns[i]}" \ 1500 + chk_fallback_nr ${netns[i]} || rc=${?} 1501 + else 1502 + chk_fallback_nr ${netns[i]} || rc=${?} 1503 + fi 1504 + done 1505 + 1506 + if [ "${rc}" != "${KSFT_PASS}" ]; then 1507 + print_results "fallback" ${rc} 1508 + fi 1509 + } 1510 + 1413 1511 chk_join_nr() 1414 1512 { 1415 1513 local syn_nr=$1 ··· 1603 1483 1604 1484 join_syn_tx="${join_syn_tx:-${syn_nr}}" \ 1605 1485 chk_join_tx_nr 1486 + 1487 + chk_fallback_nr_all 1606 1488 1607 1489 if $validate_checksum; then 1608 1490 chk_csum_nr $csum_ns1 $csum_ns2 ··· 3459 3337 join_csum_ns1=+1 join_csum_ns2=+0 \ 3460 3338 join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ 3461 3339 join_corrupted_pkts="$(pedit_action_pkts)" \ 3340 + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ 3462 3341 chk_join_nr 0 0 0 3463 3342 chk_fail_nr 1 -1 invert 3464 3343 fi