Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull mount fixes from Al Viro:
"Various mount-related bugfixes:

- split the do_move_mount() checks in subtree-of-our-ns and
entire-anon cases and adapt detached mount propagation selftest for
mount_setattr

- allow clone_private_mount() for a path on real rootfs

- fix a race in call of has_locked_children()

- fix move_mount propagation graph breakage by MOVE_MOUNT_SET_GROUP

- make sure clone_private_mnt() caller has CAP_SYS_ADMIN in the right
userns

- avoid false negatives in path_overmount()

- don't leak MNT_LOCKED from parent to child in finish_automount()

- do_change_type(): refuse to operate on unmounted/not ours mounts"

* tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
do_change_type(): refuse to operate on unmounted/not ours mounts
clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns
selftests/mount_setattr: adapt detached mount propagation test
do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases
fs: allow clone_private_mount() for a path on real rootfs
fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2)
finish_automount(): don't leak MNT_LOCKED from parent to child
path_overmount(): avoid false negatives
fs/fhandle.c: fix a race in call of has_locked_children()

Linus Torvalds 11 months ago 35b574a6 522cd6ac

+74 -59

3 changed files

expand all

namespace.c

include

linux

mount.h

tools

testing

selftests

mount_setattr

mount_setattr_test.c

+71 -42

fs/namespace.c

··· 2410 2410 namespace_unlock(); 2411 2411 } 2412 2412 2413 - bool has_locked_children(struct mount *mnt, struct dentry *dentry) 2413 + static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) 2414 2414 { 2415 2415 struct mount *child; 2416 2416 ··· 2422 2422 return true; 2423 2423 } 2424 2424 return false; 2425 + } 2426 + 2427 + bool has_locked_children(struct mount *mnt, struct dentry *dentry) 2428 + { 2429 + bool res; 2430 + 2431 + read_seqlock_excl(&mount_lock); 2432 + res = __has_locked_children(mnt, dentry); 2433 + read_sequnlock_excl(&mount_lock); 2434 + return res; 2425 2435 } 2426 2436 2427 2437 /* ··· 2478 2468 if (IS_MNT_UNBINDABLE(old_mnt)) 2479 2469 return ERR_PTR(-EINVAL); 2480 2470 2481 - if (mnt_has_parent(old_mnt)) { 2482 - if (!check_mnt(old_mnt)) 2483 - return ERR_PTR(-EINVAL); 2484 - } else { 2485 - if (!is_mounted(&old_mnt->mnt)) 2471 + /* 2472 + * Make sure the source mount is acceptable. 2473 + * Anything mounted in our mount namespace is allowed. 2474 + * Otherwise, it must be the root of an anonymous mount 2475 + * namespace, and we need to make sure no namespace 2476 + * loops get created. 2477 + */ 2478 + if (!check_mnt(old_mnt)) { 2479 + if (!is_mounted(&old_mnt->mnt) || 2480 + !is_anon_ns(old_mnt->mnt_ns) || 2481 + mnt_has_parent(old_mnt)) 2486 2482 return ERR_PTR(-EINVAL); 2487 2483 2488 - /* Make sure this isn't something purely kernel internal. */ 2489 - if (!is_anon_ns(old_mnt->mnt_ns)) 2490 - return ERR_PTR(-EINVAL); 2491 - 2492 - /* Make sure we don't create mount namespace loops. */ 2493 2484 if (!check_for_nsfs_mounts(old_mnt)) 2494 2485 return ERR_PTR(-EINVAL); 2495 2486 } 2496 2487 2497 - if (has_locked_children(old_mnt, path->dentry)) 2488 + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) 2489 + return ERR_PTR(-EPERM); 2490 + 2491 + if (__has_locked_children(old_mnt, path->dentry)) 2498 2492 return ERR_PTR(-EINVAL); 2499 2493 2500 2494 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); ··· 2944 2930 return -EINVAL; 2945 2931 2946 2932 namespace_lock(); 2933 + if (!check_mnt(mnt)) { 2934 + err = -EINVAL; 2935 + goto out_unlock; 2936 + } 2947 2937 if (type == MS_SHARED) { 2948 2938 err = invent_group_ids(mnt, recurse); 2949 2939 if (err) ··· 3039 3021 if (!may_copy_tree(old_path)) 3040 3022 return mnt; 3041 3023 3042 - if (!recurse && has_locked_children(old, old_path->dentry)) 3024 + if (!recurse && __has_locked_children(old, old_path->dentry)) 3043 3025 return mnt; 3044 3026 3045 3027 if (recurse) ··· 3432 3414 goto out; 3433 3415 3434 3416 /* From mount should not have locked children in place of To's root */ 3435 - if (has_locked_children(from, to->mnt.mnt_root)) 3417 + if (__has_locked_children(from, to->mnt.mnt_root)) 3436 3418 goto out; 3437 3419 3438 3420 /* Setting sharing groups is only allowed on private mounts */ ··· 3446 3428 if (IS_MNT_SLAVE(from)) { 3447 3429 struct mount *m = from->mnt_master; 3448 3430 3449 - list_add(&to->mnt_slave, &m->mnt_slave_list); 3431 + list_add(&to->mnt_slave, &from->mnt_slave); 3450 3432 to->mnt_master = m; 3451 3433 } 3452 3434 ··· 3471 3453 * Check if path is overmounted, i.e., if there's a mount on top of 3472 3454 * @path->mnt with @path->dentry as mountpoint. 3473 3455 * 3474 - * Context: This function expects namespace_lock() to be held. 3456 + * Context: namespace_sem must be held at least shared. 3457 + * MUST NOT be called under lock_mount_hash() (there one should just 3458 + * call __lookup_mnt() and check if it returns NULL). 3475 3459 * Return: If path is overmounted true is returned, false if not. 3476 3460 */ 3477 3461 static inline bool path_overmounted(const struct path *path) 3478 3462 { 3463 + unsigned seq = read_seqbegin(&mount_lock); 3464 + bool no_child; 3465 + 3479 3466 rcu_read_lock(); 3480 - if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { 3481 - rcu_read_unlock(); 3482 - return true; 3483 - } 3467 + no_child = !__lookup_mnt(path->mnt, path->dentry); 3484 3468 rcu_read_unlock(); 3485 - return false; 3469 + if (need_seqretry(&mount_lock, seq)) { 3470 + read_seqlock_excl(&mount_lock); 3471 + no_child = !__lookup_mnt(path->mnt, path->dentry); 3472 + read_sequnlock_excl(&mount_lock); 3473 + } 3474 + return unlikely(!no_child); 3486 3475 } 3487 3476 3488 3477 /** ··· 3648 3623 ns = old->mnt_ns; 3649 3624 3650 3625 err = -EINVAL; 3651 - if (!may_use_mount(p)) 3652 - goto out; 3653 - 3654 3626 /* The thing moved must be mounted... */ 3655 3627 if (!is_mounted(&old->mnt)) 3656 3628 goto out; 3657 3629 3658 - /* ... and either ours or the root of anon namespace */ 3659 - if (!(attached ? check_mnt(old) : is_anon_ns(ns))) 3660 - goto out; 3661 - 3662 - if (is_anon_ns(ns) && ns == p->mnt_ns) { 3630 + if (check_mnt(old)) { 3631 + /* if the source is in our namespace... */ 3632 + /* ... it should be detachable from parent */ 3633 + if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) 3634 + goto out; 3635 + /* ... and the target should be in our namespace */ 3636 + if (!check_mnt(p)) 3637 + goto out; 3638 + } else { 3663 3639 /* 3664 - * Ending up with two files referring to the root of the 3665 - * same anonymous mount namespace would cause an error 3666 - * as this would mean trying to move the same mount 3667 - * twice into the mount tree which would be rejected 3668 - * later. But be explicit about it right here. 3640 + * otherwise the source must be the root of some anon namespace. 3641 + * AV: check for mount being root of an anon namespace is worth 3642 + * an inlined predicate... 3669 3643 */ 3670 - goto out; 3671 - } else if (is_anon_ns(p->mnt_ns)) { 3644 + if (!is_anon_ns(ns) || mnt_has_parent(old)) 3645 + goto out; 3672 3646 /* 3673 - * Don't allow moving an attached mount tree to an 3674 - * anonymous mount tree. 3647 + * Bail out early if the target is within the same namespace - 3648 + * subsequent checks would've rejected that, but they lose 3649 + * some corner cases if we check it early. 3675 3650 */ 3676 - goto out; 3651 + if (ns == p->mnt_ns) 3652 + goto out; 3653 + /* 3654 + * Target should be either in our namespace or in an acceptable 3655 + * anon namespace, sensu check_anonymous_mnt(). 3656 + */ 3657 + if (!may_use_mount(p)) 3658 + goto out; 3677 3659 } 3678 - 3679 - if (old->mnt.mnt_flags & MNT_LOCKED) 3680 - goto out; 3681 3660 3682 3661 if (!path_mounted(old_path)) 3683 3662 goto out;

+2 -1

include/linux/mount.h

··· 65 65 MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, 66 66 67 67 MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | 68 - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, 68 + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | 69 + MNT_LOCKED, 69 70 }; 70 71 71 72 struct vfsmount {

+1 -16

tools/testing/selftests/mount_setattr/mount_setattr_test.c

··· 2079 2079 * means that the device information will be different for any 2080 2080 * statx() that was taken from /mnt/A before the mount compared 2081 2081 * to one after the mount. 2082 - * 2083 - * Since we already now that the device information between the 2084 - * stx1 and stx2 samples are identical we also now that stx2 and 2085 - * stx3 device information will necessarily differ. 2086 2082 */ 2087 2083 ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor); 2088 - 2089 - /* 2090 - * If mount propagation worked correctly then the tmpfs mount 2091 - * that was created after the mount namespace was unshared will 2092 - * have propagated onto /mnt/A in the detached mount tree. 2093 - * 2094 - * Verify that the device information for stx3 and stx4 are 2095 - * identical. It is already established that stx3 is different 2096 - * from both stx1 and stx2 sampled before the tmpfs mount was 2097 - * done so if stx3 and stx4 are identical the proof is done. 2098 - */ 2099 - ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor); 2084 + ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor); 2100 2085 2101 2086 EXPECT_EQ(close(fd_tree), 0); 2102 2087 }

Configure Feed

Configure Feed