Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-20190627' of gitolite.kernel.org:pub/scm/linux/kernel/git/brauner/linux

Pull pidfd fixes from Christian Brauner:
"Userspace tools and libraries such as strace or glibc need a cheap and
reliable way to tell whether CLONE_PIDFD is supported. The easiest way
is to pass an invalid fd value in the return argument, perform the
syscall and verify the value in the return argument has been changed
to a valid fd.

However, if CLONE_PIDFD is specified we currently check if pidfd == 0
and return EINVAL if not.

The check for pidfd == 0 was originally added to enable us to abuse
the return argument for passing additional flags along with
CLONE_PIDFD in the future.

However, extending legacy clone this way would be a terrible idea and
with clone3 on the horizon and the ability to reuse CLONE_DETACHED
with CLONE_PIDFD there's no real need for this clutch. So remove the
pidfd == 0 check and help userspace out.

Also, accordig to Al, anon_inode_getfd() should only be used past the
point of no failure and ksys_close() should not be used at all since
it is far too easy to get wrong. Al's motto being "basically, once
it's in descriptor table, it's out of your control". So Al's patch
switches back to what we already had in v1 of the original patchset
and uses a anon_inode_getfile() + put_user() + fd_install() sequence
in the success path and a fput() + put_unused_fd() in the failure
path.

The other two changes should be trivial"

* tag 'for-linus-20190627' of gitolite.kernel.org:pub/scm/linux/kernel/git/brauner/linux:
proc: remove useless d_is_dir() check
copy_process(): don't use ksys_close() on cleanups
samples: make pidfd-metadata fail gracefully on older kernels
fork: don't check parent_tidptr with CLONE_PIDFD

+25 -44
+1 -2
fs/proc/base.c
··· 3077 3077 3078 3078 struct pid *tgid_pidfd_to_pid(const struct file *file) 3079 3079 { 3080 - if (!d_is_dir(file->f_path.dentry) || 3081 - (file->f_op != &proc_tgid_base_operations)) 3080 + if (file->f_op != &proc_tgid_base_operations) 3082 3081 return ERR_PTR(-EBADF); 3083 3082 3084 3083 return proc_pid(file_inode(file));
+18 -40
kernel/fork.c
··· 1712 1712 #endif 1713 1713 }; 1714 1714 1715 - /** 1716 - * pidfd_create() - Create a new pid file descriptor. 1717 - * 1718 - * @pid: struct pid that the pidfd will reference 1719 - * 1720 - * This creates a new pid file descriptor with the O_CLOEXEC flag set. 1721 - * 1722 - * Note, that this function can only be called after the fd table has 1723 - * been unshared to avoid leaking the pidfd to the new process. 1724 - * 1725 - * Return: On success, a cloexec pidfd is returned. 1726 - * On error, a negative errno number will be returned. 1727 - */ 1728 - static int pidfd_create(struct pid *pid) 1729 - { 1730 - int fd; 1731 - 1732 - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), 1733 - O_RDWR | O_CLOEXEC); 1734 - if (fd < 0) 1735 - put_pid(pid); 1736 - 1737 - return fd; 1738 - } 1739 - 1740 1715 static void __delayed_free_task(struct rcu_head *rhp) 1741 1716 { 1742 1717 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); ··· 1749 1774 int pidfd = -1, retval; 1750 1775 struct task_struct *p; 1751 1776 struct multiprocess_signals delayed; 1777 + struct file *pidfile = NULL; 1752 1778 1753 1779 /* 1754 1780 * Don't allow sharing the root directory with processes in a different ··· 1798 1822 } 1799 1823 1800 1824 if (clone_flags & CLONE_PIDFD) { 1801 - int reserved; 1802 - 1803 1825 /* 1804 1826 * - CLONE_PARENT_SETTID is useless for pidfds and also 1805 1827 * parent_tidptr is used to return pidfds. ··· 1807 1833 */ 1808 1834 if (clone_flags & 1809 1835 (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) 1810 - return ERR_PTR(-EINVAL); 1811 - 1812 - /* 1813 - * Verify that parent_tidptr is sane so we can potentially 1814 - * reuse it later. 1815 - */ 1816 - if (get_user(reserved, parent_tidptr)) 1817 - return ERR_PTR(-EFAULT); 1818 - 1819 - if (reserved != 0) 1820 1836 return ERR_PTR(-EINVAL); 1821 1837 } 1822 1838 ··· 2022 2058 * if the fd table isn't shared). 2023 2059 */ 2024 2060 if (clone_flags & CLONE_PIDFD) { 2025 - retval = pidfd_create(pid); 2061 + retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); 2026 2062 if (retval < 0) 2027 2063 goto bad_fork_free_pid; 2028 2064 2029 2065 pidfd = retval; 2066 + 2067 + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, 2068 + O_RDWR | O_CLOEXEC); 2069 + if (IS_ERR(pidfile)) { 2070 + put_unused_fd(pidfd); 2071 + goto bad_fork_free_pid; 2072 + } 2073 + get_pid(pid); /* held by pidfile now */ 2074 + 2030 2075 retval = put_user(pidfd, parent_tidptr); 2031 2076 if (retval) 2032 2077 goto bad_fork_put_pidfd; ··· 2153 2180 goto bad_fork_cancel_cgroup; 2154 2181 } 2155 2182 2183 + /* past the last point of failure */ 2184 + if (pidfile) 2185 + fd_install(pidfd, pidfile); 2156 2186 2157 2187 init_task_pid_links(p); 2158 2188 if (likely(p->pid)) { ··· 2222 2246 bad_fork_cgroup_threadgroup_change_end: 2223 2247 cgroup_threadgroup_change_end(current); 2224 2248 bad_fork_put_pidfd: 2225 - if (clone_flags & CLONE_PIDFD) 2226 - ksys_close(pidfd); 2249 + if (clone_flags & CLONE_PIDFD) { 2250 + fput(pidfile); 2251 + put_unused_fd(pidfd); 2252 + } 2227 2253 bad_fork_free_pid: 2228 2254 if (pid != &init_struct_pid) 2229 2255 free_pid(pid);
+6 -2
samples/pidfd/pidfd-metadata.c
··· 83 83 84 84 int main(int argc, char *argv[]) 85 85 { 86 - int pidfd = 0, ret = EXIT_FAILURE; 86 + int pidfd = -1, ret = EXIT_FAILURE; 87 87 char buf[4096] = { 0 }; 88 88 pid_t pid; 89 89 int procfd, statusfd; ··· 91 91 92 92 pid = pidfd_clone(CLONE_PIDFD, &pidfd); 93 93 if (pid < 0) 94 - exit(ret); 94 + err(ret, "CLONE_PIDFD"); 95 + if (pidfd == -1) { 96 + warnx("CLONE_PIDFD is not supported by the kernel"); 97 + goto out; 98 + } 95 99 96 100 procfd = pidfd_metadata_fd(pid, pidfd); 97 101 close(pidfd);