Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull pidfs updates from Christian Brauner:
"This contains work to make it possible to derive namespace file
descriptors from pidfd file descriptors.

Right now it is already possible to use a pidfd with setns() to
atomically change multiple namespaces at the same time. In other
words, it is possible to switch to the namespace context of a process
using a pidfd. There is no need to first open namespace file
descriptors via procfs.

The work included here is an extension of these abilities by allowing
to open namespace file descriptors using a pidfd. This means it is now
possible to interact with namespaces without ever touching procfs.

To this end a new set of ioctls() on pidfds is introduced covering all
supported namespace types"

* tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
pidfs: allow retrieval of namespace file descriptors
nsfs: add open_namespace()
nsproxy: add helper to go from arbitrary namespace to ns_common
nsproxy: add a cleanup helper for nsproxy
file: add take_fd() cleanup helper

+179 -30
+2
fs/internal.h
··· 17 17 struct pipe_inode_info; 18 18 struct iov_iter; 19 19 struct mnt_idmap; 20 + struct ns_common; 20 21 21 22 /* 22 23 * block/bdev.c ··· 240 239 * fs/nsfs.c 241 240 */ 242 241 extern const struct dentry_operations ns_dentry_operations; 242 + int open_namespace(struct ns_common *ns); 243 243 244 244 /* 245 245 * fs/stat.c:
+32 -25
fs/nsfs.c
··· 84 84 return ns_get_path_cb(path, ns_get_path_task, &args); 85 85 } 86 86 87 - int open_related_ns(struct ns_common *ns, 88 - struct ns_common *(*get_ns)(struct ns_common *ns)) 87 + /** 88 + * open_namespace - open a namespace 89 + * @ns: the namespace to open 90 + * 91 + * This will consume a reference to @ns indendent of success or failure. 92 + * 93 + * Return: A file descriptor on success or a negative error code on failure. 94 + */ 95 + int open_namespace(struct ns_common *ns) 89 96 { 90 - struct path path = {}; 91 - struct ns_common *relative; 97 + struct path path __free(path_put) = {}; 92 98 struct file *f; 93 99 int err; 94 - int fd; 95 100 96 - fd = get_unused_fd_flags(O_CLOEXEC); 101 + /* call first to consume reference */ 102 + err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 103 + if (err < 0) 104 + return err; 105 + 106 + CLASS(get_unused_fd, fd)(O_CLOEXEC); 97 107 if (fd < 0) 98 108 return fd; 99 109 100 - relative = get_ns(ns); 101 - if (IS_ERR(relative)) { 102 - put_unused_fd(fd); 103 - return PTR_ERR(relative); 104 - } 105 - 106 - err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path); 107 - if (err < 0) { 108 - put_unused_fd(fd); 109 - return err; 110 - } 111 - 112 110 f = dentry_open(&path, O_RDONLY, current_cred()); 113 - path_put(&path); 114 - if (IS_ERR(f)) { 115 - put_unused_fd(fd); 116 - fd = PTR_ERR(f); 117 - } else 118 - fd_install(fd, f); 111 + if (IS_ERR(f)) 112 + return PTR_ERR(f); 119 113 120 - return fd; 114 + fd_install(fd, f); 115 + return take_fd(fd); 116 + } 117 + 118 + int open_related_ns(struct ns_common *ns, 119 + struct ns_common *(*get_ns)(struct ns_common *ns)) 120 + { 121 + struct ns_common *relative; 122 + 123 + relative = get_ns(ns); 124 + if (IS_ERR(relative)) 125 + return PTR_ERR(relative); 126 + 127 + return open_namespace(relative); 121 128 } 122 129 EXPORT_SYMBOL_GPL(open_related_ns); 123 130
+90
fs/pidfs.c
··· 11 11 #include <linux/proc_fs.h> 12 12 #include <linux/proc_ns.h> 13 13 #include <linux/pseudo_fs.h> 14 + #include <linux/ptrace.h> 14 15 #include <linux/seq_file.h> 15 16 #include <uapi/linux/pidfd.h> 17 + #include <linux/ipc_namespace.h> 18 + #include <linux/time_namespace.h> 19 + #include <linux/utsname.h> 20 + #include <net/net_namespace.h> 16 21 17 22 #include "internal.h" 23 + #include "mount.h" 18 24 19 25 #ifdef CONFIG_PROC_FS 20 26 /** ··· 114 108 return poll_flags; 115 109 } 116 110 111 + static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 112 + { 113 + struct task_struct *task __free(put_task) = NULL; 114 + struct nsproxy *nsp __free(put_nsproxy) = NULL; 115 + struct pid *pid = pidfd_pid(file); 116 + struct ns_common *ns_common; 117 + 118 + if (arg) 119 + return -EINVAL; 120 + 121 + task = get_pid_task(pid, PIDTYPE_PID); 122 + if (!task) 123 + return -ESRCH; 124 + 125 + scoped_guard(task_lock, task) { 126 + nsp = task->nsproxy; 127 + if (nsp) 128 + get_nsproxy(nsp); 129 + } 130 + if (!nsp) 131 + return -ESRCH; /* just pretend it didn't exist */ 132 + 133 + /* 134 + * We're trying to open a file descriptor to the namespace so perform a 135 + * filesystem cred ptrace check. Also, we mirror nsfs behavior. 136 + */ 137 + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 138 + return -EACCES; 139 + 140 + switch (cmd) { 141 + /* Namespaces that hang of nsproxy. */ 142 + case PIDFD_GET_CGROUP_NAMESPACE: 143 + get_cgroup_ns(nsp->cgroup_ns); 144 + ns_common = to_ns_common(nsp->cgroup_ns); 145 + break; 146 + case PIDFD_GET_IPC_NAMESPACE: 147 + get_ipc_ns(nsp->ipc_ns); 148 + ns_common = to_ns_common(nsp->ipc_ns); 149 + break; 150 + case PIDFD_GET_MNT_NAMESPACE: 151 + get_mnt_ns(nsp->mnt_ns); 152 + ns_common = to_ns_common(nsp->mnt_ns); 153 + break; 154 + case PIDFD_GET_NET_NAMESPACE: 155 + ns_common = to_ns_common(nsp->net_ns); 156 + get_net_ns(ns_common); 157 + break; 158 + case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: 159 + get_pid_ns(nsp->pid_ns_for_children); 160 + ns_common = to_ns_common(nsp->pid_ns_for_children); 161 + break; 162 + case PIDFD_GET_TIME_NAMESPACE: 163 + get_time_ns(nsp->time_ns); 164 + ns_common = to_ns_common(nsp->time_ns); 165 + break; 166 + case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: 167 + get_time_ns(nsp->time_ns_for_children); 168 + ns_common = to_ns_common(nsp->time_ns_for_children); 169 + break; 170 + case PIDFD_GET_UTS_NAMESPACE: 171 + get_uts_ns(nsp->uts_ns); 172 + ns_common = to_ns_common(nsp->uts_ns); 173 + break; 174 + /* Namespaces that don't hang of nsproxy. */ 175 + case PIDFD_GET_USER_NAMESPACE: 176 + rcu_read_lock(); 177 + ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); 178 + rcu_read_unlock(); 179 + break; 180 + case PIDFD_GET_PID_NAMESPACE: 181 + rcu_read_lock(); 182 + ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task))); 183 + rcu_read_unlock(); 184 + break; 185 + default: 186 + return -ENOIOCTLCMD; 187 + } 188 + 189 + /* open_namespace() unconditionally consumes the reference */ 190 + return open_namespace(ns_common); 191 + } 192 + 117 193 static const struct file_operations pidfs_file_operations = { 118 194 .poll = pidfd_poll, 119 195 #ifdef CONFIG_PROC_FS 120 196 .show_fdinfo = pidfd_show_fdinfo, 121 197 #endif 198 + .unlocked_ioctl = pidfd_ioctl, 199 + .compat_ioctl = compat_ptr_ioctl, 122 200 }; 123 201 124 202 struct pid *pidfd_pid(const struct file *file)
+8 -5
include/linux/cleanup.h
··· 63 63 64 64 #define __free(_name) __cleanup(__free_##_name) 65 65 66 - #define __get_and_null_ptr(p) \ 67 - ({ __auto_type __ptr = &(p); \ 68 - __auto_type __val = *__ptr; \ 69 - *__ptr = NULL; __val; }) 66 + #define __get_and_null(p, nullvalue) \ 67 + ({ \ 68 + __auto_type __ptr = &(p); \ 69 + __auto_type __val = *__ptr; \ 70 + *__ptr = nullvalue; \ 71 + __val; \ 72 + }) 70 73 71 74 static inline __must_check 72 75 const volatile void * __must_check_fn(const volatile void *val) 73 76 { return val; } 74 77 75 78 #define no_free_ptr(p) \ 76 - ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) 79 + ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) 77 80 78 81 #define return_ptr(p) return no_free_ptr(p) 79 82
+20
include/linux/file.h
··· 97 97 DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), 98 98 get_unused_fd_flags(flags), unsigned flags) 99 99 100 + /* 101 + * take_fd() will take care to set @fd to -EBADF ensuring that 102 + * CLASS(get_unused_fd) won't call put_unused_fd(). This makes it 103 + * easier to rely on CLASS(get_unused_fd): 104 + * 105 + * struct file *f; 106 + * 107 + * CLASS(get_unused_fd, fd)(O_CLOEXEC); 108 + * if (fd < 0) 109 + * return fd; 110 + * 111 + * f = dentry_open(&path, O_RDONLY, current_cred()); 112 + * if (IS_ERR(f)) 113 + * return PTR_ERR(fd); 114 + * 115 + * fd_install(fd, f); 116 + * return take_fd(fd); 117 + */ 118 + #define take_fd(fd) __get_and_null(fd, -EBADF) 119 + 100 120 extern void fd_install(unsigned int fd, struct file *file); 101 121 102 122 int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
+13
include/linux/nsproxy.h
··· 42 42 }; 43 43 extern struct nsproxy init_nsproxy; 44 44 45 + #define to_ns_common(__ns) \ 46 + _Generic((__ns), \ 47 + struct cgroup_namespace *: &(__ns->ns), \ 48 + struct ipc_namespace *: &(__ns->ns), \ 49 + struct net *: &(__ns->ns), \ 50 + struct pid_namespace *: &(__ns->ns), \ 51 + struct mnt_namespace *: &(__ns->ns), \ 52 + struct time_namespace *: &(__ns->ns), \ 53 + struct user_namespace *: &(__ns->ns), \ 54 + struct uts_namespace *: &(__ns->ns)) 55 + 45 56 /* 46 57 * A structure to encompass all bits needed to install 47 58 * a partial or complete new set of namespaces. ··· 122 111 { 123 112 refcount_inc(&ns->count); 124 113 } 114 + 115 + DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T)) 125 116 126 117 #endif
+14
include/uapi/linux/pidfd.h
··· 5 5 6 6 #include <linux/types.h> 7 7 #include <linux/fcntl.h> 8 + #include <linux/ioctl.h> 8 9 9 10 /* Flags for pidfd_open(). */ 10 11 #define PIDFD_NONBLOCK O_NONBLOCK ··· 15 14 #define PIDFD_SIGNAL_THREAD (1UL << 0) 16 15 #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) 17 16 #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) 17 + 18 + #define PIDFS_IOCTL_MAGIC 0xFF 19 + 20 + #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) 21 + #define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) 22 + #define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) 23 + #define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) 24 + #define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) 25 + #define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) 26 + #define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) 27 + #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) 28 + #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) 29 + #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) 18 30 19 31 #endif /* _UAPI_LINUX_PIDFD_H */