Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vsock: add netns to vsock core

Add netns logic to vsock core. Additionally, modify transport hook
prototypes to be used by later transport-specific patches (e.g.,
*_seqpacket_allow()).

Namespaces are supported primarily by changing socket lookup functions
(e.g., vsock_find_connected_socket()) to take into account the socket
namespace and the namespace mode before considering a candidate socket a
"match".

This patch also introduces the sysctl /proc/sys/net/vsock/ns_mode to
report the mode and /proc/sys/net/vsock/child_ns_mode to set the mode
for new namespaces.

Add netns functionality (initialization, passing to transports, procfs,
etc...) to the af_vsock socket layer. Later patches that add netns
support to transports depend on this patch.

This patch changes the allocation of random ports for connectible vsocks
in order to avoid leaking the random port range starting point to other
namespaces.

dgram_allow(), stream_allow(), and seqpacket_allow() callbacks are
modified to take a vsk in order to perform logic on namespace modes. In
future patches, the net will also be used for socket
lookups in these functions.

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Link: https://patch.msgid.link/20260121-vsock-vmtest-v16-1-2859a7512097@meta.com
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Bobby Eshleman and committed by
Paolo Abeni
eafb64f4 3eef6c06

+437 -51
+1
MAINTAINERS
··· 27556 27556 S: Maintained 27557 27557 F: drivers/vhost/vsock.c 27558 27558 F: include/linux/virtio_vsock.h 27559 + F: include/net/netns/vsock.h 27559 27560 F: include/uapi/linux/virtio_vsock.h 27560 27561 F: net/vmw_vsock/virtio_transport.c 27561 27562 F: net/vmw_vsock/virtio_transport_common.c
+4 -2
drivers/vhost/vsock.c
··· 407 407 return true; 408 408 } 409 409 410 - static bool vhost_transport_seqpacket_allow(u32 remote_cid); 410 + static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, 411 + u32 remote_cid); 411 412 412 413 static struct virtio_transport vhost_transport = { 413 414 .transport = { ··· 464 463 .send_pkt = vhost_transport_send_pkt, 465 464 }; 466 465 467 - static bool vhost_transport_seqpacket_allow(u32 remote_cid) 466 + static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, 467 + u32 remote_cid) 468 468 { 469 469 struct vhost_vsock *vsock; 470 470 bool seqpacket_allow = false;
+2 -2
include/linux/virtio_vsock.h
··· 256 256 257 257 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); 258 258 bool virtio_transport_stream_is_active(struct vsock_sock *vsk); 259 - bool virtio_transport_stream_allow(u32 cid, u32 port); 259 + bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port); 260 260 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 261 261 struct sockaddr_vm *addr); 262 - bool virtio_transport_dgram_allow(u32 cid, u32 port); 262 + bool virtio_transport_dgram_allow(struct vsock_sock *vsk, u32 cid, u32 port); 263 263 264 264 int virtio_transport_connect(struct vsock_sock *vsk); 265 265
+58 -3
include/net/af_vsock.h
··· 10 10 11 11 #include <linux/kernel.h> 12 12 #include <linux/workqueue.h> 13 + #include <net/netns/vsock.h> 13 14 #include <net/sock.h> 14 15 #include <uapi/linux/vm_sockets.h> 15 16 ··· 125 124 size_t len, int flags); 126 125 int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, 127 126 struct msghdr *, size_t len); 128 - bool (*dgram_allow)(u32 cid, u32 port); 127 + bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port); 129 128 130 129 /* STREAM. */ 131 130 /* TODO: stream_bind() */ ··· 137 136 s64 (*stream_has_space)(struct vsock_sock *); 138 137 u64 (*stream_rcvhiwat)(struct vsock_sock *); 139 138 bool (*stream_is_active)(struct vsock_sock *); 140 - bool (*stream_allow)(u32 cid, u32 port); 139 + bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port); 141 140 142 141 /* SEQ_PACKET. */ 143 142 ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, 144 143 int flags); 145 144 int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, 146 145 size_t len); 147 - bool (*seqpacket_allow)(u32 remote_cid); 146 + bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid); 148 147 u32 (*seqpacket_has_data)(struct vsock_sock *vsk); 149 148 150 149 /* Notification. */ ··· 217 216 struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); 218 217 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, 219 218 struct sockaddr_vm *dst); 219 + struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr, 220 + struct net *net); 221 + struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src, 222 + struct sockaddr_vm *dst, 223 + struct net *net); 220 224 void vsock_remove_sock(struct vsock_sock *vsk); 221 225 void vsock_for_each_connected_socket(struct vsock_transport *transport, 222 226 void (*fn)(struct sock *sk)); ··· 261 255 static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) 262 256 { 263 257 return t->msgzerocopy_allow && t->msgzerocopy_allow(); 258 + } 259 + 260 + static inline enum vsock_net_mode vsock_net_mode(struct net *net) 261 + { 262 + if (!net) 263 + return VSOCK_NET_MODE_GLOBAL; 264 + 265 + return READ_ONCE(net->vsock.mode); 266 + } 267 + 268 + static inline bool vsock_net_mode_global(struct vsock_sock *vsk) 269 + { 270 + return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; 271 + } 272 + 273 + static inline void vsock_net_set_child_mode(struct net *net, 274 + enum vsock_net_mode mode) 275 + { 276 + WRITE_ONCE(net->vsock.child_ns_mode, mode); 277 + } 278 + 279 + static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) 280 + { 281 + return READ_ONCE(net->vsock.child_ns_mode); 282 + } 283 + 284 + /* Return true if two namespaces pass the mode rules. Otherwise, return false. 285 + * 286 + * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL. 287 + * 288 + * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c. 289 + */ 290 + static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1) 291 + { 292 + enum vsock_net_mode mode0, mode1; 293 + 294 + /* Any vsocks within the same network namespace are always reachable, 295 + * regardless of the mode. 296 + */ 297 + if (net_eq(ns0, ns1)) 298 + return true; 299 + 300 + mode0 = vsock_net_mode(ns0); 301 + mode1 = vsock_net_mode(ns1); 302 + 303 + /* Different namespaces are only reachable if they are both 304 + * global mode. 305 + */ 306 + return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1; 264 307 } 265 308 #endif /* __AF_VSOCK_H__ */
+4
include/net/net_namespace.h
··· 37 37 #include <net/netns/smc.h> 38 38 #include <net/netns/bpf.h> 39 39 #include <net/netns/mctp.h> 40 + #include <net/netns/vsock.h> 40 41 #include <net/net_trackers.h> 41 42 #include <linux/ns_common.h> 42 43 #include <linux/idr.h> ··· 196 195 #ifdef CONFIG_DEBUG_NET_SMALL_RTNL 197 196 /* Move to a better place when the config guard is removed. */ 198 197 struct mutex rtnl_mutex; 198 + #endif 199 + #if IS_ENABLED(CONFIG_VSOCKETS) 200 + struct netns_vsock vsock; 199 201 #endif 200 202 } __randomize_layout; 201 203
+21
include/net/netns/vsock.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __NET_NET_NAMESPACE_VSOCK_H 3 + #define __NET_NET_NAMESPACE_VSOCK_H 4 + 5 + #include <linux/types.h> 6 + 7 + enum vsock_net_mode { 8 + VSOCK_NET_MODE_GLOBAL, 9 + VSOCK_NET_MODE_LOCAL, 10 + }; 11 + 12 + struct netns_vsock { 13 + struct ctl_table_header *sysctl_hdr; 14 + 15 + /* protected by the vsock_table_lock in af_vsock.c */ 16 + u32 port; 17 + 18 + enum vsock_net_mode mode; 19 + enum vsock_net_mode child_ns_mode; 20 + }; 21 + #endif /* __NET_NET_NAMESPACE_VSOCK_H */
+308 -27
net/vmw_vsock/af_vsock.c
··· 83 83 * TCP_ESTABLISHED - connected 84 84 * TCP_CLOSING - disconnecting 85 85 * TCP_LISTEN - listening 86 + * 87 + * - Namespaces in vsock support two different modes: "local" and "global". 88 + * Each mode defines how the namespace interacts with CIDs. 89 + * Each namespace exposes two sysctl files: 90 + * 91 + * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's 92 + * mode, which is set at namespace creation and immutable thereafter. 93 + * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future 94 + * child namespaces will inherit when created. The default is "global". 95 + * 96 + * Changing child_ns_mode only affects newly created namespaces, not the 97 + * current namespace or existing children. At namespace creation, ns_mode 98 + * is inherited from the parent's child_ns_mode. 99 + * 100 + * The init_net mode is "global" and cannot be modified. 101 + * 102 + * The modes affect the allocation and accessibility of CIDs as follows: 103 + * 104 + * - global - access and allocation are all system-wide 105 + * - all CID allocation from global namespaces draw from the same 106 + * system-wide pool. 107 + * - if one global namespace has already allocated some CID, another 108 + * global namespace will not be able to allocate the same CID. 109 + * - global mode AF_VSOCK sockets can reach any VM or socket in any global 110 + * namespace, they are not contained to only their own namespace. 111 + * - AF_VSOCK sockets in a global mode namespace cannot reach VMs or 112 + * sockets in any local mode namespace. 113 + * - local - access and allocation are contained within the namespace 114 + * - CID allocation draws only from a private pool local only to the 115 + * namespace, and does not affect the CIDs available for allocation in any 116 + * other namespace (global or local). 117 + * - VMs in a local namespace do not collide with CIDs in any other local 118 + * namespace or any global namespace. For example, if a VM in a local mode 119 + * namespace is given CID 10, then CID 10 is still available for 120 + * allocation in any other namespace, but not in the same namespace. 121 + * - AF_VSOCK sockets in a local mode namespace can connect only to VMs or 122 + * other sockets within their own namespace. 123 + * - sockets bound to VMADDR_CID_ANY in local namespaces will never resolve 124 + * to any transport that is not compatible with local mode. There is no 125 + * error that propagates to the user (as there is for connection attempts) 126 + * because it is possible for some packet to reach this socket from 127 + * a different transport that *does* support local mode. For 128 + * example, virtio-vsock may not support local mode, but the socket 129 + * may still accept a connection from vhost-vsock which does. 86 130 */ 87 131 88 132 #include <linux/compat.h> ··· 144 100 #include <linux/module.h> 145 101 #include <linux/mutex.h> 146 102 #include <linux/net.h> 103 + #include <linux/proc_fs.h> 147 104 #include <linux/poll.h> 148 105 #include <linux/random.h> 149 106 #include <linux/skbuff.h> 150 107 #include <linux/smp.h> 151 108 #include <linux/socket.h> 152 109 #include <linux/stddef.h> 110 + #include <linux/sysctl.h> 153 111 #include <linux/unistd.h> 154 112 #include <linux/wait.h> 155 113 #include <linux/workqueue.h> 156 114 #include <net/sock.h> 157 115 #include <net/af_vsock.h> 116 + #include <net/netns/vsock.h> 158 117 #include <uapi/linux/vm_sockets.h> 159 118 #include <uapi/asm-generic/ioctls.h> 119 + 120 + #define VSOCK_NET_MODE_STR_GLOBAL "global" 121 + #define VSOCK_NET_MODE_STR_LOCAL "local" 122 + 123 + /* 6 chars for "global", 1 for null-terminator, and 1 more for '\n'. 124 + * The newline is added by proc_dostring() for read operations. 125 + */ 126 + #define VSOCK_NET_MODE_STR_MAX 8 160 127 161 128 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); 162 129 static void vsock_sk_destruct(struct sock *sk); ··· 290 235 sock_put(&vsk->sk); 291 236 } 292 237 293 - static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) 238 + static struct sock *__vsock_find_bound_socket_net(struct sockaddr_vm *addr, 239 + struct net *net) 294 240 { 295 241 struct vsock_sock *vsk; 296 242 297 243 list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) { 298 - if (vsock_addr_equals_addr(addr, &vsk->local_addr)) 299 - return sk_vsock(vsk); 244 + struct sock *sk = sk_vsock(vsk); 245 + 246 + if (vsock_addr_equals_addr(addr, &vsk->local_addr) && 247 + vsock_net_check_mode(sock_net(sk), net)) 248 + return sk; 300 249 301 250 if (addr->svm_port == vsk->local_addr.svm_port && 302 251 (vsk->local_addr.svm_cid == VMADDR_CID_ANY || 303 - addr->svm_cid == VMADDR_CID_ANY)) 304 - return sk_vsock(vsk); 252 + addr->svm_cid == VMADDR_CID_ANY) && 253 + vsock_net_check_mode(sock_net(sk), net)) 254 + return sk; 305 255 } 306 256 307 257 return NULL; 308 258 } 309 259 310 - static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, 311 - struct sockaddr_vm *dst) 260 + static struct sock * 261 + __vsock_find_connected_socket_net(struct sockaddr_vm *src, 262 + struct sockaddr_vm *dst, struct net *net) 312 263 { 313 264 struct vsock_sock *vsk; 314 265 315 266 list_for_each_entry(vsk, vsock_connected_sockets(src, dst), 316 267 connected_table) { 268 + struct sock *sk = sk_vsock(vsk); 269 + 317 270 if (vsock_addr_equals_addr(src, &vsk->remote_addr) && 318 - dst->svm_port == vsk->local_addr.svm_port) { 319 - return sk_vsock(vsk); 271 + dst->svm_port == vsk->local_addr.svm_port && 272 + vsock_net_check_mode(sock_net(sk), net)) { 273 + return sk; 320 274 } 321 275 } 322 276 ··· 368 304 } 369 305 EXPORT_SYMBOL_GPL(vsock_remove_connected); 370 306 371 - struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) 307 + /* Find a bound socket, filtering by namespace and namespace mode. 308 + * 309 + * Use this in transports that are namespace-aware and can provide the 310 + * network namespace context. 311 + */ 312 + struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr, 313 + struct net *net) 372 314 { 373 315 struct sock *sk; 374 316 375 317 spin_lock_bh(&vsock_table_lock); 376 - sk = __vsock_find_bound_socket(addr); 318 + sk = __vsock_find_bound_socket_net(addr, net); 377 319 if (sk) 378 320 sock_hold(sk); 379 321 ··· 387 317 388 318 return sk; 389 319 } 320 + EXPORT_SYMBOL_GPL(vsock_find_bound_socket_net); 321 + 322 + /* Find a bound socket without namespace filtering. 323 + * 324 + * Use this in transports that lack namespace context. All sockets are 325 + * treated as if in global mode. 326 + */ 327 + struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) 328 + { 329 + return vsock_find_bound_socket_net(addr, NULL); 330 + } 390 331 EXPORT_SYMBOL_GPL(vsock_find_bound_socket); 391 332 392 - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, 393 - struct sockaddr_vm *dst) 333 + /* Find a connected socket, filtering by namespace and namespace mode. 334 + * 335 + * Use this in transports that are namespace-aware and can provide the 336 + * network namespace context. 337 + */ 338 + struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src, 339 + struct sockaddr_vm *dst, 340 + struct net *net) 394 341 { 395 342 struct sock *sk; 396 343 397 344 spin_lock_bh(&vsock_table_lock); 398 - sk = __vsock_find_connected_socket(src, dst); 345 + sk = __vsock_find_connected_socket_net(src, dst, net); 399 346 if (sk) 400 347 sock_hold(sk); 401 348 402 349 spin_unlock_bh(&vsock_table_lock); 403 350 404 351 return sk; 352 + } 353 + EXPORT_SYMBOL_GPL(vsock_find_connected_socket_net); 354 + 355 + /* Find a connected socket without namespace filtering. 356 + * 357 + * Use this in transports that lack namespace context. All sockets are 358 + * treated as if in global mode. 359 + */ 360 + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, 361 + struct sockaddr_vm *dst) 362 + { 363 + return vsock_find_connected_socket_net(src, dst, NULL); 405 364 } 406 365 EXPORT_SYMBOL_GPL(vsock_find_connected_socket); 407 366 ··· 627 528 628 529 if (sk->sk_type == SOCK_SEQPACKET) { 629 530 if (!new_transport->seqpacket_allow || 630 - !new_transport->seqpacket_allow(remote_cid)) { 531 + !new_transport->seqpacket_allow(vsk, remote_cid)) { 631 532 module_put(new_transport->module); 632 533 return -ESOCKTNOSUPPORT; 633 534 } ··· 775 676 static int __vsock_bind_connectible(struct vsock_sock *vsk, 776 677 struct sockaddr_vm *addr) 777 678 { 778 - static u32 port; 679 + struct net *net = sock_net(sk_vsock(vsk)); 779 680 struct sockaddr_vm new_addr; 780 681 781 - if (!port) 782 - port = get_random_u32_above(LAST_RESERVED_PORT); 682 + if (!net->vsock.port) 683 + net->vsock.port = get_random_u32_above(LAST_RESERVED_PORT); 783 684 784 685 vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); 785 686 ··· 788 689 unsigned int i; 789 690 790 691 for (i = 0; i < MAX_PORT_RETRIES; i++) { 791 - if (port == VMADDR_PORT_ANY || 792 - port <= LAST_RESERVED_PORT) 793 - port = LAST_RESERVED_PORT + 1; 692 + if (net->vsock.port == VMADDR_PORT_ANY || 693 + net->vsock.port <= LAST_RESERVED_PORT) 694 + net->vsock.port = LAST_RESERVED_PORT + 1; 794 695 795 - new_addr.svm_port = port++; 696 + new_addr.svm_port = net->vsock.port++; 796 697 797 - if (!__vsock_find_bound_socket(&new_addr)) { 698 + if (!__vsock_find_bound_socket_net(&new_addr, net)) { 798 699 found = true; 799 700 break; 800 701 } ··· 811 712 return -EACCES; 812 713 } 813 714 814 - if (__vsock_find_bound_socket(&new_addr)) 715 + if (__vsock_find_bound_socket_net(&new_addr, net)) 815 716 return -EADDRINUSE; 816 717 } 817 718 ··· 1413 1314 goto out; 1414 1315 } 1415 1316 1416 - if (!transport->dgram_allow(remote_addr->svm_cid, 1317 + if (!transport->dgram_allow(vsk, remote_addr->svm_cid, 1417 1318 remote_addr->svm_port)) { 1418 1319 err = -EINVAL; 1419 1320 goto out; ··· 1454 1355 if (err) 1455 1356 goto out; 1456 1357 1457 - if (!vsk->transport->dgram_allow(remote_addr->svm_cid, 1358 + if (!vsk->transport->dgram_allow(vsk, remote_addr->svm_cid, 1458 1359 remote_addr->svm_port)) { 1459 1360 err = -EINVAL; 1460 1361 goto out; ··· 1684 1585 * endpoints. 1685 1586 */ 1686 1587 if (!transport || 1687 - !transport->stream_allow(remote_addr->svm_cid, 1588 + !transport->stream_allow(vsk, remote_addr->svm_cid, 1688 1589 remote_addr->svm_port)) { 1689 1590 err = -ENETUNREACH; 1690 1591 goto out; ··· 2761 2662 .fops = &vsock_device_ops, 2762 2663 }; 2763 2664 2665 + static int __vsock_net_mode_string(const struct ctl_table *table, int write, 2666 + void *buffer, size_t *lenp, loff_t *ppos, 2667 + enum vsock_net_mode mode, 2668 + enum vsock_net_mode *new_mode) 2669 + { 2670 + char data[VSOCK_NET_MODE_STR_MAX] = {0}; 2671 + struct ctl_table tmp; 2672 + int ret; 2673 + 2674 + if (!table->data || !table->maxlen || !*lenp) { 2675 + *lenp = 0; 2676 + return 0; 2677 + } 2678 + 2679 + tmp = *table; 2680 + tmp.data = data; 2681 + 2682 + if (!write) { 2683 + const char *p; 2684 + 2685 + switch (mode) { 2686 + case VSOCK_NET_MODE_GLOBAL: 2687 + p = VSOCK_NET_MODE_STR_GLOBAL; 2688 + break; 2689 + case VSOCK_NET_MODE_LOCAL: 2690 + p = VSOCK_NET_MODE_STR_LOCAL; 2691 + break; 2692 + default: 2693 + WARN_ONCE(true, "netns has invalid vsock mode"); 2694 + *lenp = 0; 2695 + return 0; 2696 + } 2697 + 2698 + strscpy(data, p, sizeof(data)); 2699 + tmp.maxlen = strlen(p); 2700 + } 2701 + 2702 + ret = proc_dostring(&tmp, write, buffer, lenp, ppos); 2703 + if (ret || !write) 2704 + return ret; 2705 + 2706 + if (*lenp >= sizeof(data)) 2707 + return -EINVAL; 2708 + 2709 + if (!strncmp(data, VSOCK_NET_MODE_STR_GLOBAL, sizeof(data))) 2710 + *new_mode = VSOCK_NET_MODE_GLOBAL; 2711 + else if (!strncmp(data, VSOCK_NET_MODE_STR_LOCAL, sizeof(data))) 2712 + *new_mode = VSOCK_NET_MODE_LOCAL; 2713 + else 2714 + return -EINVAL; 2715 + 2716 + return 0; 2717 + } 2718 + 2719 + static int vsock_net_mode_string(const struct ctl_table *table, int write, 2720 + void *buffer, size_t *lenp, loff_t *ppos) 2721 + { 2722 + struct net *net; 2723 + 2724 + if (write) 2725 + return -EPERM; 2726 + 2727 + net = current->nsproxy->net_ns; 2728 + 2729 + return __vsock_net_mode_string(table, write, buffer, lenp, ppos, 2730 + vsock_net_mode(net), NULL); 2731 + } 2732 + 2733 + static int vsock_net_child_mode_string(const struct ctl_table *table, int write, 2734 + void *buffer, size_t *lenp, loff_t *ppos) 2735 + { 2736 + enum vsock_net_mode new_mode; 2737 + struct net *net; 2738 + int ret; 2739 + 2740 + net = current->nsproxy->net_ns; 2741 + 2742 + ret = __vsock_net_mode_string(table, write, buffer, lenp, ppos, 2743 + vsock_net_child_mode(net), &new_mode); 2744 + if (ret) 2745 + return ret; 2746 + 2747 + if (write) 2748 + vsock_net_set_child_mode(net, new_mode); 2749 + 2750 + return 0; 2751 + } 2752 + 2753 + static struct ctl_table vsock_table[] = { 2754 + { 2755 + .procname = "ns_mode", 2756 + .data = &init_net.vsock.mode, 2757 + .maxlen = VSOCK_NET_MODE_STR_MAX, 2758 + .mode = 0444, 2759 + .proc_handler = vsock_net_mode_string 2760 + }, 2761 + { 2762 + .procname = "child_ns_mode", 2763 + .data = &init_net.vsock.child_ns_mode, 2764 + .maxlen = VSOCK_NET_MODE_STR_MAX, 2765 + .mode = 0644, 2766 + .proc_handler = vsock_net_child_mode_string 2767 + }, 2768 + }; 2769 + 2770 + static int __net_init vsock_sysctl_register(struct net *net) 2771 + { 2772 + struct ctl_table *table; 2773 + 2774 + if (net_eq(net, &init_net)) { 2775 + table = vsock_table; 2776 + } else { 2777 + table = kmemdup(vsock_table, sizeof(vsock_table), GFP_KERNEL); 2778 + if (!table) 2779 + goto err_alloc; 2780 + 2781 + table[0].data = &net->vsock.mode; 2782 + table[1].data = &net->vsock.child_ns_mode; 2783 + } 2784 + 2785 + net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table, 2786 + ARRAY_SIZE(vsock_table)); 2787 + if (!net->vsock.sysctl_hdr) 2788 + goto err_reg; 2789 + 2790 + return 0; 2791 + 2792 + err_reg: 2793 + if (!net_eq(net, &init_net)) 2794 + kfree(table); 2795 + err_alloc: 2796 + return -ENOMEM; 2797 + } 2798 + 2799 + static void vsock_sysctl_unregister(struct net *net) 2800 + { 2801 + const struct ctl_table *table; 2802 + 2803 + table = net->vsock.sysctl_hdr->ctl_table_arg; 2804 + unregister_net_sysctl_table(net->vsock.sysctl_hdr); 2805 + if (!net_eq(net, &init_net)) 2806 + kfree(table); 2807 + } 2808 + 2809 + static void vsock_net_init(struct net *net) 2810 + { 2811 + if (net_eq(net, &init_net)) 2812 + net->vsock.mode = VSOCK_NET_MODE_GLOBAL; 2813 + else 2814 + net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); 2815 + 2816 + net->vsock.child_ns_mode = VSOCK_NET_MODE_GLOBAL; 2817 + } 2818 + 2819 + static __net_init int vsock_sysctl_init_net(struct net *net) 2820 + { 2821 + vsock_net_init(net); 2822 + 2823 + if (vsock_sysctl_register(net)) 2824 + return -ENOMEM; 2825 + 2826 + return 0; 2827 + } 2828 + 2829 + static __net_exit void vsock_sysctl_exit_net(struct net *net) 2830 + { 2831 + vsock_sysctl_unregister(net); 2832 + } 2833 + 2834 + static struct pernet_operations vsock_sysctl_ops = { 2835 + .init = vsock_sysctl_init_net, 2836 + .exit = vsock_sysctl_exit_net, 2837 + }; 2838 + 2764 2839 static int __init vsock_init(void) 2765 2840 { 2766 2841 int err = 0; ··· 2962 2689 goto err_unregister_proto; 2963 2690 } 2964 2691 2692 + if (register_pernet_subsys(&vsock_sysctl_ops)) { 2693 + err = -ENOMEM; 2694 + goto err_unregister_sock; 2695 + } 2696 + 2965 2697 vsock_bpf_build_proto(); 2966 2698 2967 2699 return 0; 2968 2700 2701 + err_unregister_sock: 2702 + sock_unregister(AF_VSOCK); 2969 2703 err_unregister_proto: 2970 2704 proto_unregister(&vsock_proto); 2971 2705 err_deregister_misc: ··· 2986 2706 misc_deregister(&vsock_device); 2987 2707 sock_unregister(AF_VSOCK); 2988 2708 proto_unregister(&vsock_proto); 2709 + unregister_pernet_subsys(&vsock_sysctl_ops); 2989 2710 } 2990 2711 2991 2712 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
+5 -2
net/vmw_vsock/hyperv_transport.c
··· 570 570 return -EOPNOTSUPP; 571 571 } 572 572 573 - static bool hvs_dgram_allow(u32 cid, u32 port) 573 + static bool hvs_dgram_allow(struct vsock_sock *vsk, u32 cid, u32 port) 574 574 { 575 575 return false; 576 576 } ··· 745 745 return hvs->chan != NULL; 746 746 } 747 747 748 - static bool hvs_stream_allow(u32 cid, u32 port) 748 + static bool hvs_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) 749 749 { 750 + if (!vsock_net_mode_global(vsk)) 751 + return false; 752 + 750 753 if (cid == VMADDR_CID_HOST) 751 754 return true; 752 755
+7 -2
net/vmw_vsock/virtio_transport.c
··· 536 536 return true; 537 537 } 538 538 539 - static bool virtio_transport_seqpacket_allow(u32 remote_cid); 539 + static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, 540 + u32 remote_cid); 540 541 541 542 static struct virtio_transport virtio_transport = { 542 543 .transport = { ··· 594 593 .can_msgzerocopy = virtio_transport_can_msgzerocopy, 595 594 }; 596 595 597 - static bool virtio_transport_seqpacket_allow(u32 remote_cid) 596 + static bool 597 + virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid) 598 598 { 599 599 struct virtio_vsock *vsock; 600 600 bool seqpacket_allow; 601 + 602 + if (!vsock_net_mode_global(vsk)) 603 + return false; 601 604 602 605 seqpacket_allow = false; 603 606 rcu_read_lock();
+3 -3
net/vmw_vsock/virtio_transport_common.c
··· 1055 1055 } 1056 1056 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 1057 1057 1058 - bool virtio_transport_stream_allow(u32 cid, u32 port) 1058 + bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) 1059 1059 { 1060 - return true; 1060 + return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; 1061 1061 } 1062 1062 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 1063 1063 ··· 1068 1068 } 1069 1069 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 1070 1070 1071 - bool virtio_transport_dgram_allow(u32 cid, u32 port) 1071 + bool virtio_transport_dgram_allow(struct vsock_sock *vsk, u32 cid, u32 port) 1072 1072 { 1073 1073 return false; 1074 1074 }
+19 -7
net/vmw_vsock/vmci_transport.c
··· 646 646 return VMCI_SUCCESS; 647 647 } 648 648 649 - static bool vmci_transport_stream_allow(u32 cid, u32 port) 649 + static bool vmci_transport_stream_allow(struct vsock_sock *vsk, u32 cid, 650 + u32 port) 650 651 { 651 652 static const u32 non_socket_contexts[] = { 652 653 VMADDR_CID_LOCAL, 653 654 }; 654 655 int i; 656 + 657 + if (!vsock_net_mode_global(vsk)) 658 + return false; 655 659 656 660 BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); 657 661 ··· 686 682 err = VMCI_SUCCESS; 687 683 bh_process_pkt = false; 688 684 689 - /* Ignore incoming packets from contexts without sockets, or resources 690 - * that aren't vsock implementations. 685 + /* Ignore incoming packets from resources that aren't vsock 686 + * implementations. 691 687 */ 692 - 693 - if (!vmci_transport_stream_allow(dg->src.context, -1) 694 - || vmci_transport_peer_rid(dg->src.context) != dg->src.resource) 688 + if (vmci_transport_peer_rid(dg->src.context) != dg->src.resource) 695 689 return VMCI_ERROR_NO_ACCESS; 696 690 697 691 if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) ··· 747 745 */ 748 746 vsk = vsock_sk(sk); 749 747 if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { 748 + err = VMCI_ERROR_NO_ACCESS; 749 + goto out; 750 + } 751 + 752 + /* Ignore incoming packets from contexts without sockets. */ 753 + if (!vmci_transport_stream_allow(vsk, dg->src.context, -1)) { 750 754 err = VMCI_ERROR_NO_ACCESS; 751 755 goto out; 752 756 } ··· 1792 1784 return err; 1793 1785 } 1794 1786 1795 - static bool vmci_transport_dgram_allow(u32 cid, u32 port) 1787 + static bool vmci_transport_dgram_allow(struct vsock_sock *vsk, u32 cid, 1788 + u32 port) 1796 1789 { 1790 + if (!vsock_net_mode_global(vsk)) 1791 + return false; 1792 + 1797 1793 if (cid == VMADDR_CID_HYPERVISOR) { 1798 1794 /* Registrations of PBRPC Servers do not modify VMX/Hypervisor 1799 1795 * state and are allowed.
+5 -3
net/vmw_vsock/vsock_loopback.c
··· 46 46 return 0; 47 47 } 48 48 49 - static bool vsock_loopback_seqpacket_allow(u32 remote_cid); 49 + static bool vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, 50 + u32 remote_cid); 50 51 static bool vsock_loopback_msgzerocopy_allow(void) 51 52 { 52 53 return true; ··· 107 106 .send_pkt = vsock_loopback_send_pkt, 108 107 }; 109 108 110 - static bool vsock_loopback_seqpacket_allow(u32 remote_cid) 109 + static bool 110 + vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid) 111 111 { 112 - return true; 112 + return vsock_net_mode_global(vsk); 113 113 } 114 114 115 115 static void vsock_loopback_work(struct work_struct *work)