Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'mptcp-pm-prep-work-for-new-ops-and-sysctl-knobs'

Matthieu Baerts says:

====================
mptcp: pm: prep work for new ops and sysctl knobs

Here are a few cleanups, preparation work for the new PM ops, and sysctl
knobs.

- Patch 1: reorg: move generic NL code used by all PMs to pm_netlink.c.

- Patch 2: use kmemdup() instead of kmalloc + copy.

- Patch 3: small cleanup to use pm var instead of msk->pm.

- Patch 4: reorg: id_avail_bitmap is only used by the in-kernel PM.

- Patch 5: use struct_group to easily reset a subset of PM data vars.

- Patch 6: introduce the minimal skeleton for the new PM ops.

- Patch 7: register in-kernel and userspace PM ops.

- Patch 8: new net.mptcp.path_manager sysctl knob, deprecating pm_type.

- Patch 9: map the new path_manager sysctl knob with pm_type.

- Patch 10: map the old pm_type sysctl knob with path_manager.

- Patch 11: new net.mptcp.available_path_managers sysctl knob.

- Patch 12: new test to validate path_manager and pm_type mapping.

Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
====================

Link: https://patch.msgid.link/20250313-net-next-mptcp-pm-ops-intro-v1-0-f4e4a88efc50@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+301 -25
+23
Documentation/networking/mptcp-sysctl.rst
··· 30 30 31 31 Default: 1 32 32 33 + available_path_managers - STRING 34 + Shows the available path managers choices that are registered. More 35 + path managers may be available, but not loaded. 36 + 33 37 available_schedulers - STRING 34 38 Shows the available schedulers choices that are registered. More packet 35 39 schedulers may be available, but not loaded. ··· 76 72 77 73 Default: 1 (enabled) 78 74 75 + path_manager - STRING 76 + Set the default path manager name to use for each new MPTCP 77 + socket. In-kernel path management will control subflow 78 + connections and address advertisements according to 79 + per-namespace values configured over the MPTCP netlink 80 + API. Userspace path management puts per-MPTCP-connection subflow 81 + connection decisions and address advertisements under control of 82 + a privileged userspace program, at the cost of more netlink 83 + traffic to propagate all of the related events and commands. 84 + 85 + This is a per-namespace sysctl. 86 + 87 + * "kernel" - In-kernel path manager 88 + * "userspace" - Userspace path manager 89 + 90 + Default: "kernel" 91 + 79 92 pm_type - INTEGER 80 93 Set the default path manager type to use for each new MPTCP 81 94 socket. In-kernel path management will control subflow ··· 104 83 traffic to propagate all of the related events and commands. 105 84 106 85 This is a per-namespace sysctl. 86 + 87 + Deprecated since v6.15, use path_manager instead. 107 88 108 89 * 0 - In-kernel path manager 109 90 * 1 - Userspace path manager
+14
include/net/mptcp.h
··· 14 14 15 15 struct mptcp_info; 16 16 struct mptcp_sock; 17 + struct mptcp_pm_addr_entry; 17 18 struct seq_file; 18 19 19 20 /* MPTCP sk_buff extension data */ ··· 115 114 struct mptcp_sched_data *data); 116 115 117 116 char name[MPTCP_SCHED_NAME_MAX]; 117 + struct module *owner; 118 + struct list_head list; 119 + 120 + void (*init)(struct mptcp_sock *msk); 121 + void (*release)(struct mptcp_sock *msk); 122 + } ____cacheline_aligned_in_smp; 123 + 124 + #define MPTCP_PM_NAME_MAX 16 125 + #define MPTCP_PM_MAX 128 126 + #define MPTCP_PM_BUF_MAX (MPTCP_PM_NAME_MAX * MPTCP_PM_MAX) 127 + 128 + struct mptcp_pm_ops { 129 + char name[MPTCP_PM_NAME_MAX]; 118 130 struct module *owner; 119 131 struct list_head list; 120 132
+112 -1
net/mptcp/ctrl.c
··· 39 39 u8 allow_join_initial_addr_port; 40 40 u8 pm_type; 41 41 char scheduler[MPTCP_SCHED_NAME_MAX]; 42 + char path_manager[MPTCP_PM_NAME_MAX]; 42 43 }; 43 44 44 45 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) ··· 84 83 return mptcp_get_pernet(net)->pm_type; 85 84 } 86 85 86 + const char *mptcp_get_path_manager(const struct net *net) 87 + { 88 + return mptcp_get_pernet(net)->path_manager; 89 + } 90 + 87 91 const char *mptcp_get_scheduler(const struct net *net) 88 92 { 89 93 return mptcp_get_pernet(net)->scheduler; ··· 107 101 pernet->stale_loss_cnt = 4; 108 102 pernet->pm_type = MPTCP_PM_TYPE_KERNEL; 109 103 strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); 104 + strscpy(pernet->path_manager, "kernel", sizeof(pernet->path_manager)); 110 105 } 111 106 112 107 #ifdef CONFIG_SYSCTL ··· 181 174 return ret; 182 175 } 183 176 177 + static int mptcp_set_path_manager(char *path_manager, const char *name) 178 + { 179 + struct mptcp_pm_ops *pm_ops; 180 + int ret = 0; 181 + 182 + rcu_read_lock(); 183 + pm_ops = mptcp_pm_find(name); 184 + if (pm_ops) 185 + strscpy(path_manager, name, MPTCP_PM_NAME_MAX); 186 + else 187 + ret = -ENOENT; 188 + rcu_read_unlock(); 189 + 190 + return ret; 191 + } 192 + 193 + static int proc_path_manager(const struct ctl_table *ctl, int write, 194 + void *buffer, size_t *lenp, loff_t *ppos) 195 + { 196 + struct mptcp_pernet *pernet = container_of(ctl->data, 197 + struct mptcp_pernet, 198 + path_manager); 199 + char (*path_manager)[MPTCP_PM_NAME_MAX] = ctl->data; 200 + char pm_name[MPTCP_PM_NAME_MAX]; 201 + const struct ctl_table tbl = { 202 + .data = pm_name, 203 + .maxlen = MPTCP_PM_NAME_MAX, 204 + }; 205 + int ret; 206 + 207 + strscpy(pm_name, *path_manager, MPTCP_PM_NAME_MAX); 208 + 209 + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 210 + if (write && ret == 0) { 211 + ret = mptcp_set_path_manager(*path_manager, pm_name); 212 + if (ret == 0) { 213 + u8 pm_type = __MPTCP_PM_TYPE_NR; 214 + 215 + if (strncmp(pm_name, "kernel", MPTCP_PM_NAME_MAX) == 0) 216 + pm_type = MPTCP_PM_TYPE_KERNEL; 217 + else if (strncmp(pm_name, "userspace", MPTCP_PM_NAME_MAX) == 0) 218 + pm_type = MPTCP_PM_TYPE_USERSPACE; 219 + pernet->pm_type = pm_type; 220 + } 221 + } 222 + 223 + return ret; 224 + } 225 + 226 + static int proc_pm_type(const struct ctl_table *ctl, int write, 227 + void *buffer, size_t *lenp, loff_t *ppos) 228 + { 229 + struct mptcp_pernet *pernet = container_of(ctl->data, 230 + struct mptcp_pernet, 231 + pm_type); 232 + int ret; 233 + 234 + ret = proc_dou8vec_minmax(ctl, write, buffer, lenp, ppos); 235 + if (write && ret == 0) { 236 + u8 pm_type = READ_ONCE(*(u8 *)ctl->data); 237 + char *pm_name = ""; 238 + 239 + if (pm_type == MPTCP_PM_TYPE_KERNEL) 240 + pm_name = "kernel"; 241 + else if (pm_type == MPTCP_PM_TYPE_USERSPACE) 242 + pm_name = "userspace"; 243 + mptcp_set_path_manager(pernet->path_manager, pm_name); 244 + } 245 + 246 + return ret; 247 + } 248 + 249 + static int proc_available_path_managers(const struct ctl_table *ctl, 250 + int write, void *buffer, 251 + size_t *lenp, loff_t *ppos) 252 + { 253 + struct ctl_table tbl = { .maxlen = MPTCP_PM_BUF_MAX, }; 254 + int ret; 255 + 256 + tbl.data = kmalloc(tbl.maxlen, GFP_USER); 257 + if (!tbl.data) 258 + return -ENOMEM; 259 + 260 + mptcp_pm_get_available(tbl.data, MPTCP_PM_BUF_MAX); 261 + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 262 + kfree(tbl.data); 263 + 264 + return ret; 265 + } 266 + 184 267 static struct ctl_table mptcp_sysctl_table[] = { 185 268 { 186 269 .procname = "enabled", ··· 315 218 .procname = "pm_type", 316 219 .maxlen = sizeof(u8), 317 220 .mode = 0644, 318 - .proc_handler = proc_dou8vec_minmax, 221 + .proc_handler = proc_pm_type, 319 222 .extra1 = SYSCTL_ZERO, 320 223 .extra2 = &mptcp_pm_type_max 321 224 }, ··· 350 253 .mode = 0644, 351 254 .proc_handler = proc_dou8vec_minmax, 352 255 }, 256 + { 257 + .procname = "path_manager", 258 + .maxlen = MPTCP_PM_NAME_MAX, 259 + .mode = 0644, 260 + .proc_handler = proc_path_manager, 261 + }, 262 + { 263 + .procname = "available_path_managers", 264 + .maxlen = MPTCP_PM_BUF_MAX, 265 + .mode = 0444, 266 + .proc_handler = proc_available_path_managers, 267 + }, 353 268 }; 354 269 355 270 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) ··· 387 278 table[8].data = &pernet->close_timeout; 388 279 table[9].data = &pernet->blackhole_timeout; 389 280 table[10].data = &pernet->syn_retrans_before_tcp_fallback; 281 + table[11].data = &pernet->path_manager; 282 + /* table[12] is for available_path_managers which is read-only info */ 390 283 391 284 hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, 392 285 ARRAY_SIZE(mptcp_sysctl_table));
+81 -16
net/mptcp/pm.c
··· 5 5 */ 6 6 #define pr_fmt(fmt) "MPTCP: " fmt 7 7 8 + #include <linux/rculist.h> 9 + #include <linux/spinlock.h> 8 10 #include "protocol.h" 9 11 #include "mib.h" 10 12 ··· 19 17 struct timer_list add_timer; 20 18 struct mptcp_sock *sock; 21 19 }; 20 + 21 + static DEFINE_SPINLOCK(mptcp_pm_list_lock); 22 + static LIST_HEAD(mptcp_pm_list); 22 23 23 24 /* path manager helpers */ 24 25 ··· 516 511 * be sure to serve this event only once. 517 512 */ 518 513 if (READ_ONCE(pm->work_pending) && 519 - !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) 514 + !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) 520 515 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); 521 516 522 - if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) 517 + if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) 523 518 announce = true; 524 519 525 - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 520 + pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 526 521 spin_unlock_bh(&pm->lock); 527 522 528 523 if (announce) ··· 983 978 u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); 984 979 struct mptcp_pm_data *pm = &msk->pm; 985 980 986 - pm->add_addr_signaled = 0; 987 - pm->add_addr_accepted = 0; 988 - pm->local_addr_used = 0; 989 - pm->subflows = 0; 981 + memset(&pm->reset, 0, sizeof(pm->reset)); 990 982 pm->rm_list_tx.nr = 0; 991 983 pm->rm_list_rx.nr = 0; 992 984 WRITE_ONCE(pm->pm_type, pm_type); ··· 1002 1000 !!mptcp_pm_get_add_addr_accept_max(msk) && 1003 1001 subflows_allowed); 1004 1002 WRITE_ONCE(pm->accept_subflow, subflows_allowed); 1005 - } else { 1006 - WRITE_ONCE(pm->work_pending, 0); 1007 - WRITE_ONCE(pm->accept_addr, 0); 1008 - WRITE_ONCE(pm->accept_subflow, 0); 1009 - } 1010 1003 1011 - WRITE_ONCE(pm->addr_signal, 0); 1012 - WRITE_ONCE(pm->remote_deny_join_id0, false); 1013 - pm->status = 0; 1014 - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1004 + bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1005 + } 1015 1006 } 1016 1007 1017 1008 void mptcp_pm_data_init(struct mptcp_sock *msk) ··· 1017 1022 1018 1023 void __init mptcp_pm_init(void) 1019 1024 { 1025 + mptcp_pm_kernel_register(); 1026 + mptcp_pm_userspace_register(); 1020 1027 mptcp_pm_nl_init(); 1028 + } 1029 + 1030 + /* Must be called with rcu read lock held */ 1031 + struct mptcp_pm_ops *mptcp_pm_find(const char *name) 1032 + { 1033 + struct mptcp_pm_ops *pm_ops; 1034 + 1035 + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { 1036 + if (!strcmp(pm_ops->name, name)) 1037 + return pm_ops; 1038 + } 1039 + 1040 + return NULL; 1041 + } 1042 + 1043 + int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops) 1044 + { 1045 + return 0; 1046 + } 1047 + 1048 + int mptcp_pm_register(struct mptcp_pm_ops *pm_ops) 1049 + { 1050 + int ret; 1051 + 1052 + ret = mptcp_pm_validate(pm_ops); 1053 + if (ret) 1054 + return ret; 1055 + 1056 + spin_lock(&mptcp_pm_list_lock); 1057 + if (mptcp_pm_find(pm_ops->name)) { 1058 + spin_unlock(&mptcp_pm_list_lock); 1059 + return -EEXIST; 1060 + } 1061 + list_add_tail_rcu(&pm_ops->list, &mptcp_pm_list); 1062 + spin_unlock(&mptcp_pm_list_lock); 1063 + 1064 + pr_debug("%s registered\n", pm_ops->name); 1065 + return 0; 1066 + } 1067 + 1068 + void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops) 1069 + { 1070 + /* skip unregistering the default path manager */ 1071 + if (WARN_ON_ONCE(pm_ops == &mptcp_pm_kernel)) 1072 + return; 1073 + 1074 + spin_lock(&mptcp_pm_list_lock); 1075 + list_del_rcu(&pm_ops->list); 1076 + spin_unlock(&mptcp_pm_list_lock); 1077 + } 1078 + 1079 + /* Build string with list of available path manager values. 1080 + * Similar to tcp_get_available_congestion_control() 1081 + */ 1082 + void mptcp_pm_get_available(char *buf, size_t maxlen) 1083 + { 1084 + struct mptcp_pm_ops *pm_ops; 1085 + size_t offs = 0; 1086 + 1087 + rcu_read_lock(); 1088 + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { 1089 + offs += snprintf(buf + offs, maxlen - offs, "%s%s", 1090 + offs == 0 ? "" : " ", pm_ops->name); 1091 + 1092 + if (WARN_ON_ONCE(offs >= maxlen)) 1093 + break; 1094 + } 1095 + rcu_read_unlock(); 1021 1096 }
+9 -7
net/mptcp/pm_kernel.c
··· 710 710 return ret; 711 711 712 712 /* address not found, add to local list */ 713 - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 713 + entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); 714 714 if (!entry) 715 715 return -ENOMEM; 716 716 717 - *entry = *skc; 718 717 entry->addr.port = 0; 719 718 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 720 719 if (ret < 0) ··· 816 817 return -EINVAL; 817 818 } 818 819 819 - entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); 820 + entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); 820 821 if (!entry) { 821 822 GENL_SET_ERR_MSG(info, "can't allocate addr"); 822 823 return -ENOMEM; 823 824 } 824 825 825 - *entry = addr; 826 826 if (entry->addr.port) { 827 827 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 828 828 if (ret) { ··· 1398 1400 .size = sizeof(struct pm_nl_pernet), 1399 1401 }; 1400 1402 1401 - void __init mptcp_pm_nl_init(void) 1403 + struct mptcp_pm_ops mptcp_pm_kernel = { 1404 + .name = "kernel", 1405 + .owner = THIS_MODULE, 1406 + }; 1407 + 1408 + void __init mptcp_pm_kernel_register(void) 1402 1409 { 1403 1410 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 1404 1411 panic("Failed to register MPTCP PM pernet subsystem.\n"); 1405 1412 1406 - if (genl_register_family(&mptcp_genl_family)) 1407 - panic("Failed to register MPTCP PM netlink family\n"); 1413 + mptcp_pm_register(&mptcp_pm_kernel); 1408 1414 }
+6
net/mptcp/pm_netlink.c
··· 625 625 .mcgrps = mptcp_pm_mcgrps, 626 626 .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), 627 627 }; 628 + 629 + void __init mptcp_pm_nl_init(void) 630 + { 631 + if (genl_register_family(&mptcp_genl_family)) 632 + panic("Failed to register MPTCP PM netlink family\n"); 633 + }
+10
net/mptcp/pm_userspace.c
··· 682 682 sock_put(sk); 683 683 return ret; 684 684 } 685 + 686 + static struct mptcp_pm_ops mptcp_pm_userspace = { 687 + .name = "userspace", 688 + .owner = THIS_MODULE, 689 + }; 690 + 691 + void __init mptcp_pm_userspace_register(void) 692 + { 693 + mptcp_pm_register(&mptcp_pm_userspace); 694 + }
+17
net/mptcp/protocol.h
··· 223 223 224 224 spinlock_t lock; /*protects the whole PM data */ 225 225 226 + struct_group(reset, 227 + 226 228 u8 addr_signal; 227 229 bool server_side; 228 230 bool work_pending; ··· 237 235 u8 pm_type; 238 236 u8 subflows; 239 237 u8 status; 238 + 239 + ); 240 + 240 241 DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 241 242 struct mptcp_rm_list rm_list_tx; 242 243 struct mptcp_rm_list rm_list_rx; ··· 699 694 unsigned int mptcp_stale_loss_cnt(const struct net *net); 700 695 unsigned int mptcp_close_timeout(const struct sock *sk); 701 696 int mptcp_get_pm_type(const struct net *net); 697 + const char *mptcp_get_path_manager(const struct net *net); 702 698 const char *mptcp_get_scheduler(const struct net *net); 703 699 704 700 void mptcp_active_disable(struct sock *sk); ··· 1051 1045 void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, 1052 1046 struct mptcp_pm_addr_entry *entry); 1053 1047 1048 + /* the default path manager, used in mptcp_pm_unregister */ 1049 + extern struct mptcp_pm_ops mptcp_pm_kernel; 1050 + 1051 + struct mptcp_pm_ops *mptcp_pm_find(const char *name); 1052 + int mptcp_pm_register(struct mptcp_pm_ops *pm_ops); 1053 + void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops); 1054 + int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops); 1055 + void mptcp_pm_get_available(char *buf, size_t maxlen); 1056 + 1054 1057 void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); 1055 1058 1056 1059 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, ··· 1162 1147 return local_id; 1163 1148 } 1164 1149 1150 + void __init mptcp_pm_kernel_register(void); 1151 + void __init mptcp_pm_userspace_register(void); 1165 1152 void __init mptcp_pm_nl_init(void); 1166 1153 void mptcp_pm_worker(struct mptcp_sock *msk); 1167 1154 void __mptcp_pm_kernel_worker(struct mptcp_sock *msk);
+29 -1
tools/testing/selftests/net/mptcp/userspace_pm.sh
··· 117 117 trap cleanup EXIT 118 118 119 119 # Create and configure network namespaces for testing 120 + print_title "Init" 120 121 mptcp_lib_ns_init ns1 ns2 122 + 123 + # check path_manager and pm_type sysctl mapping 124 + if [ -f /proc/sys/net/mptcp/path_manager ]; then 125 + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace 126 + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" 127 + if [ "${pm_type}" != "1" ]; then 128 + test_fail "unexpected pm_type: ${pm_type}" 129 + mptcp_lib_result_print_all_tap 130 + exit ${KSFT_FAIL} 131 + fi 132 + 133 + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null 134 + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" 135 + if [ "${pm_type}" != "1" ]; then 136 + test_fail "unexpected pm_type after error: ${pm_type}" 137 + mptcp_lib_result_print_all_tap 138 + exit ${KSFT_FAIL} 139 + fi 140 + 141 + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 142 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" 143 + if [ "${pm_name}" != "kernel" ]; then 144 + test_fail "unexpected path-manager: ${pm_name}" 145 + mptcp_lib_result_print_all_tap 146 + exit ${KSFT_FAIL} 147 + fi 148 + fi 149 + 121 150 for i in "$ns1" "$ns2" ;do 122 151 ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 123 152 done ··· 181 152 sleep 0.5 182 153 mptcp_lib_subtests_last_ts_reset 183 154 184 - print_title "Init" 185 155 print_test "Created network namespaces ns1, ns2" 186 156 test_pass 187 157