Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mptcp-expose-more-info-and-small-improvements'

Matthieu Baerts says:

====================
mptcp: expose more info and small improvements

Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.

Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.

Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".

Patch 7/9 is similar to commit 47867f0a7e83 ("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).

Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.

Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================

Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+356 -46
+29
include/uapi/linux/mptcp.h
··· 123 123 __u8 mptcpi_local_addr_used; 124 124 __u8 mptcpi_local_addr_max; 125 125 __u8 mptcpi_csum_enabled; 126 + __u32 mptcpi_retransmits; 127 + __u64 mptcpi_bytes_retrans; 128 + __u64 mptcpi_bytes_sent; 129 + __u64 mptcpi_bytes_received; 130 + __u64 mptcpi_bytes_acked; 126 131 }; 127 132 128 133 /* ··· 249 244 }; 250 245 }; 251 246 247 + struct mptcp_subflow_info { 248 + __u32 id; 249 + struct mptcp_subflow_addrs addrs; 250 + }; 251 + 252 + struct mptcp_full_info { 253 + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ 254 + __u32 size_tcpinfo_user; 255 + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ 256 + __u32 size_sfinfo_user; 257 + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ 258 + __u32 size_arrays_user; /* max subflows that userspace is interested in; 259 + * the buffers at subflow_info/tcp_info 260 + * are respectively at least: 261 + * size_arrays * size_sfinfo_user 262 + * size_arrays * size_tcpinfo_user 263 + * bytes wide 264 + */ 265 + __aligned_u64 subflow_info; 266 + __aligned_u64 tcp_info; 267 + struct mptcp_info mptcp_info; 268 + }; 269 + 252 270 /* MPTCP socket options */ 253 271 #define MPTCP_INFO 1 254 272 #define MPTCP_TCPINFO 2 255 273 #define MPTCP_SUBFLOW_ADDRS 3 274 + #define MPTCP_FULL_INFO 4 256 275 257 276 #endif /* _UAPI_MPTCP_H */
+13 -1
net/mptcp/options.c
··· 1026 1026 return cur_seq; 1027 1027 } 1028 1028 1029 + static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) 1030 + { 1031 + msk->bytes_acked += new_snd_una - msk->snd_una; 1032 + msk->snd_una = new_snd_una; 1033 + } 1034 + 1029 1035 static void ack_update_msk(struct mptcp_sock *msk, 1030 1036 struct sock *ssk, 1031 1037 struct mptcp_options_received *mp_opt) ··· 1063 1057 __mptcp_check_push(sk, ssk); 1064 1058 1065 1059 if (after64(new_snd_una, old_snd_una)) { 1066 - msk->snd_una = new_snd_una; 1060 + __mptcp_snd_una_update(msk, new_snd_una); 1067 1061 __mptcp_data_acked(sk); 1068 1062 } 1069 1063 mptcp_data_unlock(sk); ··· 1125 1119 mptcp_data_lock(subflow->conn); 1126 1120 if (sk_stream_memory_free(sk)) 1127 1121 __mptcp_check_push(subflow->conn, sk); 1122 + 1123 + /* on fallback we just need to ignore the msk-level snd_una, as 1124 + * this is really plain TCP 1125 + */ 1126 + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); 1127 + 1128 1128 __mptcp_data_acked(subflow->conn); 1129 1129 mptcp_data_unlock(subflow->conn); 1130 1130 return true;
+4 -4
net/mptcp/pm_netlink.c
··· 341 341 } 342 342 343 343 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 344 - const struct mptcp_pm_addr_entry *entry) 344 + const struct mptcp_addr_info *addr) 345 345 { 346 346 struct mptcp_pm_add_entry *add_entry = NULL; 347 347 struct sock *sk = (struct sock *)msk; ··· 349 349 350 350 lockdep_assert_held(&msk->pm.lock); 351 351 352 - add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); 352 + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 353 353 354 354 if (add_entry) { 355 355 if (mptcp_pm_is_kernel(msk)) ··· 366 366 367 367 list_add(&add_entry->list, &msk->pm.anno_list); 368 368 369 - add_entry->addr = entry->addr; 369 + add_entry->addr = *addr; 370 370 add_entry->sock = msk; 371 371 add_entry->retrans_times = 0; 372 372 ··· 576 576 return; 577 577 578 578 if (local) { 579 - if (mptcp_pm_alloc_anno_list(msk, local)) { 579 + if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { 580 580 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 581 581 msk->pm.add_addr_signaled++; 582 582 mptcp_pm_announce_addr(msk, &local->addr, false);
+1 -1
net/mptcp/pm_userspace.c
··· 193 193 lock_sock((struct sock *)msk); 194 194 spin_lock_bh(&msk->pm.lock); 195 195 196 - if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { 196 + if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { 197 197 msk->pm.add_addr_signaled++; 198 198 mptcp_pm_announce_addr(msk, &addr_val.addr, false); 199 199 mptcp_pm_nl_addr_send_ack(msk);
+19 -12
net/mptcp/protocol.c
··· 96 96 list_add(&subflow->node, &msk->conn_list); 97 97 sock_hold(ssock->sk); 98 98 subflow->request_mptcp = 1; 99 + subflow->subflow_id = msk->subflow_id++; 99 100 100 101 /* This is the first subflow, always with id 0 */ 101 102 subflow->local_id_valid = 1; ··· 378 377 379 378 if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { 380 379 /* in sequence */ 380 + msk->bytes_received += copy_len; 381 381 WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); 382 382 tail = skb_peek_tail(&sk->sk_receive_queue); 383 383 if (tail && mptcp_try_coalesce(sk, tail, skb)) ··· 762 760 MPTCP_SKB_CB(skb)->map_seq += delta; 763 761 __skb_queue_tail(&sk->sk_receive_queue, skb); 764 762 } 763 + msk->bytes_received += end_seq - msk->ack_seq; 765 764 msk->ack_seq = end_seq; 766 765 moved = true; 767 766 } ··· 848 845 if (sk->sk_socket && !ssk->sk_socket) 849 846 mptcp_sock_graft(ssk, sk->sk_socket); 850 847 848 + mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; 851 849 mptcp_sockopt_sync_locked(msk, ssk); 852 850 mptcp_subflow_joined(msk, ssk); 853 851 return true; ··· 1007 1003 struct mptcp_sock *msk = mptcp_sk(sk); 1008 1004 struct mptcp_data_frag *dtmp, *dfrag; 1009 1005 u64 snd_una; 1010 - 1011 - /* on fallback we just need to ignore snd_una, as this is really 1012 - * plain TCP 1013 - */ 1014 - if (__mptcp_check_fallback(msk)) 1015 - msk->snd_una = READ_ONCE(msk->snd_nxt); 1016 1006 1017 1007 snd_una = msk->snd_una; 1018 1008 list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { ··· 1535 1537 * that has been handed to the subflow for transmission 1536 1538 * and skip update in case it was old dfrag. 1537 1539 */ 1538 - if (likely(after64(snd_nxt_new, msk->snd_nxt))) 1540 + if (likely(after64(snd_nxt_new, msk->snd_nxt))) { 1541 + msk->bytes_sent += snd_nxt_new - msk->snd_nxt; 1539 1542 msk->snd_nxt = snd_nxt_new; 1543 + } 1540 1544 } 1541 1545 1542 1546 void mptcp_check_and_set_pending(struct sock *sk) ··· 2596 2596 } 2597 2597 if (copied) { 2598 2598 dfrag->already_sent = max(dfrag->already_sent, info.sent); 2599 + msk->bytes_retrans += copied; 2599 2600 tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, 2600 2601 info.size_goal); 2601 2602 WRITE_ONCE(msk->allow_infinite_fallback, false); ··· 2655 2654 struct mptcp_subflow_context *subflow, *tmp; 2656 2655 struct mptcp_sock *msk = mptcp_sk(sk); 2657 2656 2657 + inet_sk_state_store(sk, TCP_CLOSE); 2658 2658 mptcp_for_each_subflow_safe(msk, subflow, tmp) 2659 2659 __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), 2660 2660 subflow, MPTCP_CF_FASTCLOSE); ··· 2693 2691 * even if it is orphaned and in FIN_WAIT2 state 2694 2692 */ 2695 2693 if (sock_flag(sk, SOCK_DEAD)) { 2696 - if (mptcp_should_close(sk)) { 2697 - inet_sk_state_store(sk, TCP_CLOSE); 2694 + if (mptcp_should_close(sk)) 2698 2695 mptcp_do_fastclose(sk); 2699 - } 2696 + 2700 2697 if (sk->sk_state == TCP_CLOSE) { 2701 2698 __mptcp_destroy_sock(sk); 2702 2699 goto unlock; ··· 2734 2733 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); 2735 2734 WRITE_ONCE(msk->allow_infinite_fallback, true); 2736 2735 msk->recovery = false; 2736 + msk->subflow_id = 1; 2737 2737 2738 2738 mptcp_pm_data_init(msk); 2739 2739 ··· 2938 2936 void __mptcp_unaccepted_force_close(struct sock *sk) 2939 2937 { 2940 2938 sock_set_flag(sk, SOCK_DEAD); 2941 - inet_sk_state_store(sk, TCP_CLOSE); 2942 2939 mptcp_do_fastclose(sk); 2943 2940 __mptcp_destroy_sock(sk); 2944 2941 } ··· 2979 2978 /* If the msk has read data, or the caller explicitly ask it, 2980 2979 * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose 2981 2980 */ 2982 - inet_sk_state_store(sk, TCP_CLOSE); 2983 2981 mptcp_do_fastclose(sk); 2984 2982 timeout = 0; 2985 2983 } else if (mptcp_close_state(sk)) { ··· 3108 3108 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); 3109 3109 mptcp_pm_data_reset(msk); 3110 3110 mptcp_ca_reset(sk); 3111 + msk->bytes_acked = 0; 3112 + msk->bytes_received = 0; 3113 + msk->bytes_sent = 0; 3114 + msk->bytes_retrans = 0; 3111 3115 3112 3116 WRITE_ONCE(sk->sk_shutdown, 0); 3113 3117 sk_error_report(sk); ··· 3160 3156 msk->snd_una = msk->write_seq; 3161 3157 msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; 3162 3158 msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; 3159 + 3160 + /* passive msk is created after the first/MPC subflow */ 3161 + msk->subflow_id = 2; 3163 3162 3164 3163 sock_reset_flag(nsk, SOCK_RCU_FREE); 3165 3164 security_inet_csk_clone(nsk, req);
+9 -2
net/mptcp/protocol.h
··· 262 262 u64 local_key; 263 263 u64 remote_key; 264 264 u64 write_seq; 265 + u64 bytes_sent; 265 266 u64 snd_nxt; 267 + u64 bytes_received; 266 268 u64 ack_seq; 267 269 atomic64_t rcv_wnd_sent; 268 270 u64 rcv_data_fin_seq; 271 + u64 bytes_retrans; 269 272 int rmem_fwd_alloc; 270 273 struct sock *last_snd; 271 274 int snd_burst; ··· 277 274 * recovery related fields are under data_lock 278 275 * protection 279 276 */ 277 + u64 bytes_acked; 280 278 u64 snd_una; 281 279 u64 wnd_end; 282 280 unsigned long timer_ival; ··· 323 319 u64 rtt_us; /* last maximum rtt of subflows */ 324 320 } rcvq_space; 325 321 326 - u32 setsockopt_seq; 322 + u32 subflow_id; 323 + u32 setsockopt_seq; 327 324 char ca_name[TCP_CA_NAME_MAX]; 328 325 struct mptcp_sock *dl_next; 329 326 }; ··· 504 499 u8 reset_transient:1; 505 500 u8 reset_reason:4; 506 501 u8 stale_count; 502 + 503 + u32 subflow_id; 507 504 508 505 long delegated_status; 509 506 unsigned long fail_tout; ··· 817 810 struct mptcp_addr_info *rem, 818 811 u8 bkup); 819 812 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 820 - const struct mptcp_pm_addr_entry *entry); 813 + const struct mptcp_addr_info *addr); 821 814 void mptcp_pm_free_anno_list(struct mptcp_sock *msk); 822 815 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); 823 816 struct mptcp_pm_add_entry *
+145 -7
net/mptcp/sockopt.c
··· 14 14 #include <net/mptcp.h> 15 15 #include "protocol.h" 16 16 17 - #define MIN_INFO_OPTLEN_SIZE 16 17 + #define MIN_INFO_OPTLEN_SIZE 16 18 + #define MIN_FULL_INFO_OPTLEN_SIZE 40 18 19 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 20 21 { ··· 890 889 891 890 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 892 891 { 892 + struct sock *sk = (struct sock *)msk; 893 893 u32 flags = 0; 894 + bool slow; 894 895 895 896 memset(info, 0, sizeof(*info)); 896 897 ··· 900 897 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 901 898 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 902 899 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 900 + 901 + if (inet_sk_state_load(sk) == TCP_LISTEN) 902 + return; 903 903 904 904 /* The following limits only make sense for the in-kernel PM */ 905 905 if (mptcp_pm_is_kernel(msk)) { ··· 921 915 if (READ_ONCE(msk->can_ack)) 922 916 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 923 917 info->mptcpi_flags = flags; 924 - info->mptcpi_token = READ_ONCE(msk->token); 925 - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); 926 - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); 927 - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); 928 - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 918 + mptcp_data_lock(sk); 919 + info->mptcpi_snd_una = msk->snd_una; 920 + info->mptcpi_rcv_nxt = msk->ack_seq; 921 + info->mptcpi_bytes_acked = msk->bytes_acked; 922 + mptcp_data_unlock(sk); 923 + 924 + slow = lock_sock_fast(sk); 925 + info->mptcpi_csum_enabled = msk->csum_enabled; 926 + info->mptcpi_token = msk->token; 927 + info->mptcpi_write_seq = msk->write_seq; 928 + info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 929 + info->mptcpi_bytes_sent = msk->bytes_sent; 930 + info->mptcpi_bytes_received = msk->bytes_received; 931 + info->mptcpi_bytes_retrans = msk->bytes_retrans; 932 + unlock_sock_fast(sk, slow); 929 933 } 930 934 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 931 935 ··· 982 966 } 983 967 984 968 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 985 - char __user *optval, int __user *optlen) 969 + char __user *optval, 970 + int __user *optlen) 986 971 { 987 972 int len, copylen; 988 973 ··· 1164 1147 return 0; 1165 1148 } 1166 1149 1150 + static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1151 + char __user *optval, 1152 + int __user *optlen) 1153 + { 1154 + int len; 1155 + 1156 + BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1157 + MIN_FULL_INFO_OPTLEN_SIZE); 1158 + 1159 + if (get_user(len, optlen)) 1160 + return -EFAULT; 1161 + 1162 + if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1163 + return -EINVAL; 1164 + 1165 + memset(mfi, 0, sizeof(*mfi)); 1166 + if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1167 + return -EFAULT; 1168 + 1169 + if (mfi->size_tcpinfo_kernel || 1170 + mfi->size_sfinfo_kernel || 1171 + mfi->num_subflows) 1172 + return -EINVAL; 1173 + 1174 + if (mfi->size_sfinfo_user > INT_MAX || 1175 + mfi->size_tcpinfo_user > INT_MAX) 1176 + return -EINVAL; 1177 + 1178 + return len - MIN_FULL_INFO_OPTLEN_SIZE; 1179 + } 1180 + 1181 + static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1182 + char __user *optval, 1183 + u32 copylen, 1184 + int __user *optlen) 1185 + { 1186 + copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1187 + if (put_user(copylen, optlen)) 1188 + return -EFAULT; 1189 + 1190 + if (copy_to_user(optval, mfi, copylen)) 1191 + return -EFAULT; 1192 + return 0; 1193 + } 1194 + 1195 + static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1196 + int __user *optlen) 1197 + { 1198 + unsigned int sfcount = 0, copylen = 0; 1199 + struct mptcp_subflow_context *subflow; 1200 + struct sock *sk = (struct sock *)msk; 1201 + void __user *tcpinfoptr, *sfinfoptr; 1202 + struct mptcp_full_info mfi; 1203 + int len; 1204 + 1205 + len = mptcp_get_full_info(&mfi, optval, optlen); 1206 + if (len < 0) 1207 + return len; 1208 + 1209 + /* don't bother filling the mptcp info if there is not enough 1210 + * user-space-provided storage 1211 + */ 1212 + if (len > 0) { 1213 + mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1214 + copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1215 + } 1216 + 1217 + mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1218 + mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1219 + sizeof(struct tcp_info)); 1220 + sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1221 + mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1222 + mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1223 + sizeof(struct mptcp_subflow_info)); 1224 + tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1225 + 1226 + lock_sock(sk); 1227 + mptcp_for_each_subflow(msk, subflow) { 1228 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1229 + struct mptcp_subflow_info sfinfo; 1230 + struct tcp_info tcp_info; 1231 + 1232 + if (sfcount++ >= mfi.size_arrays_user) 1233 + continue; 1234 + 1235 + /* fetch addr/tcp_info only if the user space buffers 1236 + * are wide enough 1237 + */ 1238 + memset(&sfinfo, 0, sizeof(sfinfo)); 1239 + sfinfo.id = subflow->subflow_id; 1240 + if (mfi.size_sfinfo_user > 1241 + offsetof(struct mptcp_subflow_info, addrs)) 1242 + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1243 + if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1244 + goto fail_release; 1245 + 1246 + if (mfi.size_tcpinfo_user) { 1247 + tcp_get_info(ssk, &tcp_info); 1248 + if (copy_to_user(tcpinfoptr, &tcp_info, 1249 + mfi.size_tcpinfo_user)) 1250 + goto fail_release; 1251 + } 1252 + 1253 + tcpinfoptr += mfi.size_tcpinfo_user; 1254 + sfinfoptr += mfi.size_sfinfo_user; 1255 + } 1256 + release_sock(sk); 1257 + 1258 + mfi.num_subflows = sfcount; 1259 + if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1260 + return -EFAULT; 1261 + 1262 + return 0; 1263 + 1264 + fail_release: 1265 + release_sock(sk); 1266 + return -EFAULT; 1267 + } 1268 + 1167 1269 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1168 1270 int __user *optlen, int val) 1169 1271 { ··· 1356 1220 switch (optname) { 1357 1221 case MPTCP_INFO: 1358 1222 return mptcp_getsockopt_info(msk, optval, optlen); 1223 + case MPTCP_FULL_INFO: 1224 + return mptcp_getsockopt_full_info(msk, optval, optlen); 1359 1225 case MPTCP_TCPINFO: 1360 1226 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1361 1227 case MPTCP_SUBFLOW_ADDRS:
+2
net/mptcp/subflow.c
··· 819 819 if (!ctx->conn) 820 820 goto fallback; 821 821 822 + ctx->subflow_id = 1; 822 823 owner = mptcp_sk(ctx->conn); 823 824 mptcp_pm_new_connection(owner, child, 1); 824 825 ··· 1575 1574 subflow->remote_id = remote_id; 1576 1575 subflow->request_join = 1; 1577 1576 subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1577 + subflow->subflow_id = msk->subflow_id++; 1578 1578 mptcp_info2sockaddr(remote, &addr, ssk->sk_family); 1579 1579 1580 1580 sock_hold(ssk);
+17 -16
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 1683 1683 timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) 1684 1684 1685 1685 printf "%-${nr_blank}s %s" " " "add TX" 1686 - count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}') 1687 - [ -z "$count" ] && count=0 1688 - 1686 + count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") 1687 + if [ -z "$count" ]; then 1688 + echo -n "[skip]" 1689 1689 # if the test configured a short timeout tolerate greater then expected 1690 1690 # add addrs options, due to retransmissions 1691 - if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then 1691 + elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then 1692 1692 echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr" 1693 1693 fail_test 1694 1694 else ··· 1696 1696 fi 1697 1697 1698 1698 echo -n " - echo TX " 1699 - count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}') 1700 - [ -z "$count" ] && count=0 1701 - if [ "$count" != "$echo_tx_nr" ]; then 1699 + count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") 1700 + if [ -z "$count" ]; then 1701 + echo "[skip]" 1702 + elif [ "$count" != "$echo_tx_nr" ]; then 1702 1703 echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" 1703 1704 fail_test 1704 1705 else ··· 1735 1734 fi 1736 1735 1737 1736 printf "%-${nr_blank}s %s" " " "rm " 1738 - count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}') 1739 - [ -z "$count" ] && count=0 1740 - if [ "$count" != "$rm_addr_nr" ]; then 1737 + count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") 1738 + if [ -z "$count" ]; then 1739 + echo -n "[skip]" 1740 + elif [ "$count" != "$rm_addr_nr" ]; then 1741 1741 echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" 1742 1742 fail_test 1743 1743 else ··· 1780 1778 local rm_addr_tx_nr=$1 1781 1779 1782 1780 printf "%-${nr_blank}s %s" " " "rm TX " 1783 - count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}') 1784 - [ -z "$count" ] && count=0 1785 - if [ "$count" != "$rm_addr_tx_nr" ]; then 1781 + count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") 1782 + if [ -z "$count" ]; then 1783 + echo "[skip]" 1784 + elif [ "$count" != "$rm_addr_tx_nr" ]; then 1786 1785 echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr" 1787 1786 fail_test 1788 1787 else 1789 - echo -n "[ ok ]" 1788 + echo "[ ok ]" 1790 1789 fi 1791 - 1792 - echo "$extra_msg" 1793 1790 } 1794 1791 1795 1792 chk_prio_nr()
+117 -3
tools/testing/selftests/net/mptcp/mptcp_sockopt.c
··· 51 51 __u8 mptcpi_local_addr_used; 52 52 __u8 mptcpi_local_addr_max; 53 53 __u8 mptcpi_csum_enabled; 54 + __u32 mptcpi_retransmits; 55 + __u64 mptcpi_bytes_retrans; 56 + __u64 mptcpi_bytes_sent; 57 + __u64 mptcpi_bytes_received; 58 + __u64 mptcpi_bytes_acked; 54 59 }; 55 60 56 61 struct mptcp_subflow_data { ··· 86 81 #define MPTCP_SUBFLOW_ADDRS 3 87 82 #endif 88 83 84 + #ifndef MPTCP_FULL_INFO 85 + struct mptcp_subflow_info { 86 + __u32 id; 87 + struct mptcp_subflow_addrs addrs; 88 + }; 89 + 90 + struct mptcp_full_info { 91 + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ 92 + __u32 size_tcpinfo_user; 93 + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ 94 + __u32 size_sfinfo_user; 95 + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ 96 + __u32 size_arrays_user; /* max subflows that userspace is interested in; 97 + * the buffers at subflow_info/tcp_info 98 + * are respectively at least: 99 + * size_arrays * size_sfinfo_user 100 + * size_arrays * size_tcpinfo_user 101 + * bytes wide 102 + */ 103 + __aligned_u64 subflow_info; 104 + __aligned_u64 tcp_info; 105 + struct mptcp_info mptcp_info; 106 + }; 107 + 108 + #define MPTCP_FULL_INFO 4 109 + #endif 110 + 89 111 struct so_state { 90 112 struct mptcp_info mi; 113 + struct mptcp_info last_sample; 114 + struct tcp_info tcp_info; 115 + struct mptcp_subflow_addrs addrs; 91 116 uint64_t mptcpi_rcv_delta; 92 117 uint64_t tcpi_rcv_delta; 118 + bool pkt_stats_avail; 93 119 }; 94 120 95 121 #ifndef MIN ··· 358 322 if (ret < 0) 359 323 die_perror("getsockopt MPTCP_INFO"); 360 324 361 - assert(olen == sizeof(i)); 325 + s->pkt_stats_avail = olen >= sizeof(i); 362 326 327 + s->last_sample = i; 363 328 if (s->mi.mptcpi_write_seq == 0) 364 329 s->mi = i; 365 330 ··· 399 362 olen -= sizeof(struct mptcp_subflow_data); 400 363 assert(olen == ti.d.size_user); 401 364 365 + s->tcp_info = ti.ti[0]; 366 + 402 367 if (ti.ti[0].tcpi_bytes_sent == w && 403 368 ti.ti[0].tcpi_bytes_received == r) 404 369 goto done; ··· 422 383 do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); 423 384 } 424 385 425 - static void do_getsockopt_subflow_addrs(int fd) 386 + static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) 426 387 { 427 388 struct sockaddr_storage remote, local; 428 389 socklen_t olen, rlen, llen; ··· 470 431 471 432 assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); 472 433 assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); 434 + s->addrs = addrs.addr[0]; 473 435 474 436 memset(&addrs, 0, sizeof(addrs)); 475 437 ··· 491 451 do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); 492 452 } 493 453 454 + static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) 455 + { 456 + size_t data_size = sizeof(struct mptcp_full_info); 457 + struct mptcp_subflow_info sfinfo[2]; 458 + struct tcp_info tcp_info[2]; 459 + struct mptcp_full_info mfi; 460 + socklen_t olen; 461 + int ret; 462 + 463 + memset(&mfi, 0, data_size); 464 + memset(tcp_info, 0, sizeof(tcp_info)); 465 + memset(sfinfo, 0, sizeof(sfinfo)); 466 + 467 + mfi.size_tcpinfo_user = sizeof(struct tcp_info); 468 + mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info); 469 + mfi.size_arrays_user = 2; 470 + mfi.subflow_info = (unsigned long)&sfinfo[0]; 471 + mfi.tcp_info = (unsigned long)&tcp_info[0]; 472 + olen = data_size; 473 + 474 + ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen); 475 + if (ret < 0) { 476 + if (errno == EOPNOTSUPP) { 477 + perror("MPTCP_FULL_INFO test skipped"); 478 + return; 479 + } 480 + xerror("getsockopt MPTCP_FULL_INFO"); 481 + } 482 + 483 + assert(olen <= data_size); 484 + assert(mfi.size_tcpinfo_kernel > 0); 485 + assert(mfi.size_tcpinfo_user == 486 + MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info))); 487 + assert(mfi.size_sfinfo_kernel > 0); 488 + assert(mfi.size_sfinfo_user == 489 + MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info))); 490 + assert(mfi.num_subflows == 1); 491 + 492 + /* Tolerate future extension to mptcp_info struct and running newer 493 + * test on top of older kernel. 494 + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include 495 + * the following in mptcp_info. 496 + */ 497 + assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info)); 498 + assert(mfi.mptcp_info.mptcpi_subflows == 0); 499 + assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent); 500 + assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received); 501 + 502 + assert(sfinfo[0].id == 1); 503 + assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent); 504 + assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received); 505 + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs))); 506 + } 507 + 494 508 static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) 495 509 { 496 510 do_getsockopt_mptcp_info(s, fd, w); 497 511 498 512 do_getsockopt_tcp_info(s, fd, r, w); 499 513 500 - do_getsockopt_subflow_addrs(fd); 514 + do_getsockopt_subflow_addrs(s, fd); 515 + 516 + if (r) 517 + do_getsockopt_mptcp_full_info(s, fd); 501 518 } 502 519 503 520 static void connect_one_server(int fd, int pipefd) ··· 659 562 do_getsockopts(&s, fd, ret, ret2); 660 563 if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) 661 564 xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); 565 + 566 + /* be nice when running on top of older kernel */ 567 + if (s.pkt_stats_avail) { 568 + if (s.last_sample.mptcpi_bytes_sent != ret2) 569 + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, 570 + s.last_sample.mptcpi_bytes_sent, ret2, 571 + s.last_sample.mptcpi_bytes_sent - ret2); 572 + if (s.last_sample.mptcpi_bytes_received != ret) 573 + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, 574 + s.last_sample.mptcpi_bytes_received, ret, 575 + s.last_sample.mptcpi_bytes_received - ret); 576 + if (s.last_sample.mptcpi_bytes_acked != ret) 577 + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, 578 + s.last_sample.mptcpi_bytes_acked, ret2, 579 + s.last_sample.mptcpi_bytes_acked - ret2); 580 + } 581 + 662 582 close(fd); 663 583 } 664 584