Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.17-rc1.nsfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull namespace updates from Christian Brauner:
"This contains namespace updates. This time specifically for nsfs:

- Userspace heavily relies on the root inode numbers for namespaces
to identify the initial namespaces. That's already a hard
dependency. So we cannot change that anymore. Move the initial
inode numbers to a public header and align the only two namespaces
that currently don't do that with all the other namespaces.

- The root inode of /proc having a fixed inode number has been part
of the core kernel ABI since its inception, and recently some
userspace programs (mainly container runtimes) have started to
explicitly depend on this behaviour.

The main reason this is useful to userspace is that by checking
that a suspect /proc handle has fstype PROC_SUPER_MAGIC and is
PROCFS_ROOT_INO, they can then use openat2() together with
RESOLVE_{NO_{XDEV,MAGICLINK},BENEATH} to ensure that there isn't a
bind-mount that replaces some procfs file with a different one.

This kind of attack has lead to security issues in container
runtimes in the past (such as CVE-2019-19921) and libraries like
libpathrs[1] use this feature of procfs to provide safe procfs
handling functions"

* tag 'vfs-6.17-rc1.nsfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
uapi: export PROCFS_ROOT_INO
mntns: use stable inode number for initial mount ns
netns: use stable inode number for initial mount ns
nsfs: move root inode number to uapi

+47 -13
+3 -1
fs/namespace.c
··· 6090 6090 if (IS_ERR(mnt)) 6091 6091 panic("Can't create rootfs"); 6092 6092 6093 - ns = alloc_mnt_ns(&init_user_ns, false); 6093 + ns = alloc_mnt_ns(&init_user_ns, true); 6094 6094 if (IS_ERR(ns)) 6095 6095 panic("Can't allocate initial namespace"); 6096 + ns->seq = atomic64_inc_return(&mnt_ns_seq); 6097 + ns->ns.inum = PROC_MNT_INIT_INO; 6096 6098 m = real_mount(mnt); 6097 6099 ns->root = m; 6098 6100 ns->nr_mounts = 1;
+5 -5
fs/proc/root.c
··· 363 363 * This is the root "inode" in the /proc tree.. 364 364 */ 365 365 struct proc_dir_entry proc_root = { 366 - .low_ino = PROC_ROOT_INO, 367 - .namelen = 5, 368 - .mode = S_IFDIR | S_IRUGO | S_IXUGO, 369 - .nlink = 2, 366 + .low_ino = PROCFS_ROOT_INO, 367 + .namelen = 5, 368 + .mode = S_IFDIR | S_IRUGO | S_IXUGO, 369 + .nlink = 2, 370 370 .refcnt = REFCOUNT_INIT(1), 371 - .proc_iops = &proc_root_inode_operations, 371 + .proc_iops = &proc_root_inode_operations, 372 372 .proc_dir_ops = &proc_root_operations, 373 373 .parent = &proc_root, 374 374 .subdir = RB_ROOT,
+9 -7
include/linux/proc_ns.h
··· 6 6 #define _LINUX_PROC_NS_H 7 7 8 8 #include <linux/ns_common.h> 9 + #include <uapi/linux/nsfs.h> 9 10 10 11 struct pid_namespace; 11 12 struct nsset; ··· 40 39 * We always define these enumerators 41 40 */ 42 41 enum { 43 - PROC_ROOT_INO = 1, 44 - PROC_IPC_INIT_INO = 0xEFFFFFFFU, 45 - PROC_UTS_INIT_INO = 0xEFFFFFFEU, 46 - PROC_USER_INIT_INO = 0xEFFFFFFDU, 47 - PROC_PID_INIT_INO = 0xEFFFFFFCU, 48 - PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, 49 - PROC_TIME_INIT_INO = 0xEFFFFFFAU, 42 + PROC_IPC_INIT_INO = IPC_NS_INIT_INO, 43 + PROC_UTS_INIT_INO = UTS_NS_INIT_INO, 44 + PROC_USER_INIT_INO = USER_NS_INIT_INO, 45 + PROC_PID_INIT_INO = PID_NS_INIT_INO, 46 + PROC_CGROUP_INIT_INO = CGROUP_NS_INIT_INO, 47 + PROC_TIME_INIT_INO = TIME_NS_INIT_INO, 48 + PROC_NET_INIT_INO = NET_NS_INIT_INO, 49 + PROC_MNT_INIT_INO = MNT_NS_INIT_INO, 50 50 }; 51 51 52 52 #ifdef CONFIG_PROC_FS
+11
include/uapi/linux/fs.h
··· 60 60 #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ 61 61 #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ 62 62 63 + /* 64 + * The root inode of procfs is guaranteed to always have the same inode number. 65 + * For programs that make heavy use of procfs, verifying that the root is a 66 + * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) 67 + * will allow you to make sure you are never tricked into operating on the 68 + * wrong procfs file. 69 + */ 70 + enum procfs_ino { 71 + PROCFS_ROOT_INO = 1, 72 + }; 73 + 63 74 struct file_clone_range { 64 75 __s64 src_fd; 65 76 __u64 src_offset;
+11
include/uapi/linux/nsfs.h
··· 42 42 /* Get previous namespace. */ 43 43 #define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) 44 44 45 + enum init_ns_ino { 46 + IPC_NS_INIT_INO = 0xEFFFFFFFU, 47 + UTS_NS_INIT_INO = 0xEFFFFFFEU, 48 + USER_NS_INIT_INO = 0xEFFFFFFDU, 49 + PID_NS_INIT_INO = 0xEFFFFFFCU, 50 + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, 51 + TIME_NS_INIT_INO = 0xEFFFFFFAU, 52 + NET_NS_INIT_INO = 0xEFFFFFF9U, 53 + MNT_NS_INIT_INO = 0xEFFFFFF8U, 54 + }; 55 + 45 56 #endif /* __LINUX_NSFS_H */
+8
net/core/net_namespace.c
··· 796 796 #ifdef CONFIG_NET_NS 797 797 net->ns.ops = &netns_operations; 798 798 #endif 799 + if (net == &init_net) { 800 + net->ns.inum = PROC_NET_INIT_INO; 801 + return 0; 802 + } 799 803 return ns_alloc_inum(&net->ns); 800 804 } 801 805 802 806 static __net_exit void net_ns_net_exit(struct net *net) 803 807 { 808 + /* 809 + * Initial network namespace doesn't exit so we don't need any 810 + * special checks here. 811 + */ 804 812 ns_free_inum(&net->ns); 805 813 } 806 814