Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'pull-mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs mount updates from Al Viro:
"Several piles this cycle, this mount-related one being the largest and
trickiest:

- saner handling of guards in fs/namespace.c, getting rid of
needlessly strong locking in some of the users

- lock_mount() calling conventions change - have it set the
environment for attaching to given location, storing the results in
caller-supplied object, without altering the passed struct path.

Make unlock_mount() called as __cleanup for those objects. It's not
exactly guard(), but similar to it

- MNT_WRITE_HOLD done right.

mnt_hold_writers() does *not* mess with ->mnt_flags anymore, so
insertion of a new mount into ->s_mounts of underlying superblock
does not, in itself, expose ->mnt_flags of that mount to concurrent
modifications

- getting rid of pathological cases when umount() spends quadratic
time removing the victims from propagation graph - part of that had
been dealt with last cycle, this should finish it

- a bunch of stuff constified

- assorted cleanups

* tag 'pull-mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (64 commits)
constify {__,}mnt_is_readonly()
WRITE_HOLD machinery: no need for to bump mount_lock seqcount
struct mount: relocate MNT_WRITE_HOLD bit
preparations to taking MNT_WRITE_HOLD out of ->mnt_flags
setup_mnt(): primitive for connecting a mount to filesystem
simplify the callers of mnt_unhold_writers()
copy_mnt_ns(): use guards
copy_mnt_ns(): use the regular mechanism for freeing empty mnt_ns on failure
open_detached_copy(): separate creation of namespace into helper
open_detached_copy(): don't bother with mount_lock_hash()
path_has_submounts(): use guard(mount_locked_reader)
fs/namespace.c: sanitize descriptions for {__,}lookup_mnt()
ecryptfs: get rid of pointless mount references in ecryptfs dentries
umount_tree(): take all victims out of propagation graph at once
do_mount(): use __free(path_put)
do_move_mount_old(): use __free(path_put)
constify can_move_mount_beneath() arguments
path_umount(): constify struct path argument
may_copy_tree(), __do_loopback(): constify struct path argument
path_mount(): constify struct path argument
...

+610 -652
+2 -2
fs/dcache.c
··· 1390 1390 unsigned int mounted; 1391 1391 }; 1392 1392 1393 + /* locks: mount_locked_reader && dentry->d_lock */ 1393 1394 static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry) 1394 1395 { 1395 1396 struct check_mount *info = data; ··· 1417 1416 { 1418 1417 struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; 1419 1418 1420 - read_seqlock_excl(&mount_lock); 1419 + guard(mount_locked_reader)(); 1421 1420 d_walk(parent->dentry, &data, path_check_mount); 1422 - read_sequnlock_excl(&mount_lock); 1423 1421 1424 1422 return data.mounted; 1425 1423 }
+1 -13
fs/ecryptfs/dentry.c
··· 59 59 return rc; 60 60 } 61 61 62 - struct kmem_cache *ecryptfs_dentry_info_cache; 63 - 64 - static void ecryptfs_dentry_free_rcu(struct rcu_head *head) 65 - { 66 - kmem_cache_free(ecryptfs_dentry_info_cache, 67 - container_of(head, struct ecryptfs_dentry_info, rcu)); 68 - } 69 - 70 62 /** 71 63 * ecryptfs_d_release 72 64 * @dentry: The ecryptfs dentry ··· 67 75 */ 68 76 static void ecryptfs_d_release(struct dentry *dentry) 69 77 { 70 - struct ecryptfs_dentry_info *p = dentry->d_fsdata; 71 - if (p) { 72 - path_put(&p->lower_path); 73 - call_rcu(&p->rcu, ecryptfs_dentry_free_rcu); 74 - } 78 + dput(dentry->d_fsdata); 75 79 } 76 80 77 81 const struct dentry_operations ecryptfs_dops = {
+11 -16
fs/ecryptfs/ecryptfs_kernel.h
··· 258 258 struct ecryptfs_crypt_stat crypt_stat; 259 259 }; 260 260 261 - /* dentry private data. Each dentry must keep track of a lower 262 - * vfsmount too. */ 263 - struct ecryptfs_dentry_info { 264 - struct path lower_path; 265 - struct rcu_head rcu; 266 - }; 267 - 268 261 /** 269 262 * ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint 270 263 * @flags: Status flags ··· 341 348 /* superblock private data. */ 342 349 struct ecryptfs_sb_info { 343 350 struct super_block *wsi_sb; 351 + struct vfsmount *lower_mnt; 344 352 struct ecryptfs_mount_crypt_stat mount_crypt_stat; 345 353 }; 346 354 ··· 488 494 } 489 495 490 496 static inline void 491 - ecryptfs_set_dentry_private(struct dentry *dentry, 492 - struct ecryptfs_dentry_info *dentry_info) 497 + ecryptfs_set_dentry_lower(struct dentry *dentry, 498 + struct dentry *lower_dentry) 493 499 { 494 - dentry->d_fsdata = dentry_info; 500 + dentry->d_fsdata = lower_dentry; 495 501 } 496 502 497 503 static inline struct dentry * 498 504 ecryptfs_dentry_to_lower(struct dentry *dentry) 499 505 { 500 - return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; 506 + return dentry->d_fsdata; 501 507 } 502 508 503 - static inline const struct path * 504 - ecryptfs_dentry_to_lower_path(struct dentry *dentry) 509 + static inline struct path 510 + ecryptfs_lower_path(struct dentry *dentry) 505 511 { 506 - return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; 512 + return (struct path){ 513 + .mnt = ecryptfs_superblock_to_private(dentry->d_sb)->lower_mnt, 514 + .dentry = ecryptfs_dentry_to_lower(dentry) 515 + }; 507 516 } 508 517 509 518 #define ecryptfs_printk(type, fmt, arg...) \ ··· 529 532 530 533 extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; 531 534 extern struct kmem_cache *ecryptfs_file_info_cache; 532 - extern struct kmem_cache *ecryptfs_dentry_info_cache; 533 535 extern struct kmem_cache *ecryptfs_inode_info_cache; 534 536 extern struct kmem_cache *ecryptfs_sb_info_cache; 535 537 extern struct kmem_cache *ecryptfs_header_cache; ··· 553 557 size_t *encoded_name_size, 554 558 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 555 559 const char *name, size_t name_size); 556 - struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); 557 560 void ecryptfs_dump_hex(char *data, int bytes); 558 561 int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, 559 562 int sg_size);
+7 -8
fs/ecryptfs/file.c
··· 33 33 struct iov_iter *to) 34 34 { 35 35 ssize_t rc; 36 - const struct path *path; 37 36 struct file *file = iocb->ki_filp; 38 37 39 38 rc = generic_file_read_iter(iocb, to); 40 39 if (rc >= 0) { 41 - path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 42 - touch_atime(path); 40 + struct path path = ecryptfs_lower_path(file->f_path.dentry); 41 + touch_atime(&path); 43 42 } 44 43 return rc; 45 44 } ··· 58 59 size_t len, unsigned int flags) 59 60 { 60 61 ssize_t rc; 61 - const struct path *path; 62 62 63 63 rc = filemap_splice_read(in, ppos, pipe, len, flags); 64 64 if (rc >= 0) { 65 - path = ecryptfs_dentry_to_lower_path(in->f_path.dentry); 66 - touch_atime(path); 65 + struct path path = ecryptfs_lower_path(in->f_path.dentry); 66 + touch_atime(&path); 67 67 } 68 68 return rc; 69 69 } ··· 281 283 * ecryptfs_lookup() */ 282 284 struct ecryptfs_file_info *file_info; 283 285 struct file *lower_file; 286 + struct path path; 284 287 285 288 /* Released in ecryptfs_release or end of function if failure */ 286 289 file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ··· 291 292 "Error attempting to allocate memory\n"); 292 293 return -ENOMEM; 293 294 } 294 - lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry), 295 - file->f_flags, current_cred()); 295 + path = ecryptfs_lower_path(ecryptfs_dentry); 296 + lower_file = dentry_open(&path, file->f_flags, current_cred()); 296 297 if (IS_ERR(lower_file)) { 297 298 printk(KERN_ERR "%s: Error attempting to initialize " 298 299 "the lower file for the dentry with name "
+5 -14
fs/ecryptfs/inode.c
··· 327 327 static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, 328 328 struct dentry *lower_dentry) 329 329 { 330 - const struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); 330 + struct dentry *lower_parent = ecryptfs_dentry_to_lower(dentry->d_parent); 331 331 struct inode *inode, *lower_inode; 332 - struct ecryptfs_dentry_info *dentry_info; 333 332 int rc = 0; 334 333 335 - dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 336 - if (!dentry_info) { 337 - dput(lower_dentry); 338 - return ERR_PTR(-ENOMEM); 339 - } 340 - 341 334 fsstack_copy_attr_atime(d_inode(dentry->d_parent), 342 - d_inode(path->dentry)); 335 + d_inode(lower_parent)); 343 336 BUG_ON(!d_count(lower_dentry)); 344 337 345 - ecryptfs_set_dentry_private(dentry, dentry_info); 346 - dentry_info->lower_path.mnt = mntget(path->mnt); 347 - dentry_info->lower_path.dentry = lower_dentry; 338 + ecryptfs_set_dentry_lower(dentry, lower_dentry); 348 339 349 340 /* 350 341 * negative dentry can go positive under us here - its parent is not ··· 1012 1021 { 1013 1022 struct dentry *dentry = path->dentry; 1014 1023 struct kstat lower_stat; 1024 + struct path lower_path = ecryptfs_lower_path(dentry); 1015 1025 int rc; 1016 1026 1017 - rc = vfs_getattr_nosec(ecryptfs_dentry_to_lower_path(dentry), 1018 - &lower_stat, request_mask, flags); 1027 + rc = vfs_getattr_nosec(&lower_path, &lower_stat, request_mask, flags); 1019 1028 if (!rc) { 1020 1029 fsstack_copy_attr_all(d_inode(dentry), 1021 1030 ecryptfs_inode_to_lower(d_inode(dentry)));
+6 -18
fs/ecryptfs/main.c
··· 106 106 struct file **lower_file) 107 107 { 108 108 const struct cred *cred = current_cred(); 109 - const struct path *path = ecryptfs_dentry_to_lower_path(dentry); 109 + struct path path = ecryptfs_lower_path(dentry); 110 110 int rc; 111 111 112 - rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, 113 - cred); 112 + rc = ecryptfs_privileged_open(lower_file, path.dentry, path.mnt, cred); 114 113 if (rc) { 115 114 printk(KERN_ERR "Error opening lower file " 116 115 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 117 - "rc = [%d]\n", path->dentry, path->mnt, rc); 116 + "rc = [%d]\n", path.dentry, path.mnt, rc); 118 117 (*lower_file) = NULL; 119 118 } 120 119 return rc; ··· 436 437 struct ecryptfs_fs_context *ctx = fc->fs_private; 437 438 struct ecryptfs_sb_info *sbi = fc->s_fs_info; 438 439 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 439 - struct ecryptfs_dentry_info *root_info; 440 440 const char *err = "Getting sb failed"; 441 441 struct inode *inode; 442 442 struct path path; ··· 541 543 goto out_free; 542 544 } 543 545 544 - rc = -ENOMEM; 545 - root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 546 - if (!root_info) 547 - goto out_free; 548 - 549 - /* ->kill_sb() will take care of root_info */ 550 - ecryptfs_set_dentry_private(s->s_root, root_info); 551 - root_info->lower_path = path; 546 + ecryptfs_set_dentry_lower(s->s_root, path.dentry); 547 + ecryptfs_superblock_to_private(s)->lower_mnt = path.mnt; 552 548 553 549 s->s_flags |= SB_ACTIVE; 554 550 fc->root = dget(s->s_root); ··· 572 580 kill_anon_super(sb); 573 581 if (!sb_info) 574 582 return; 583 + mntput(sb_info->lower_mnt); 575 584 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); 576 585 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 577 586 } ··· 659 666 .cache = &ecryptfs_file_info_cache, 660 667 .name = "ecryptfs_file_cache", 661 668 .size = sizeof(struct ecryptfs_file_info), 662 - }, 663 - { 664 - .cache = &ecryptfs_dentry_info_cache, 665 - .name = "ecryptfs_dentry_info_cache", 666 - .size = sizeof(struct ecryptfs_dentry_info), 667 669 }, 668 670 { 669 671 .cache = &ecryptfs_inode_info_cache,
+2 -2
fs/internal.h
··· 84 84 extern void dissolve_on_fput(struct vfsmount *); 85 85 extern bool may_mount(void); 86 86 87 - int path_mount(const char *dev_name, struct path *path, 87 + int path_mount(const char *dev_name, const struct path *path, 88 88 const char *type_page, unsigned long flags, void *data_page); 89 - int path_umount(struct path *path, int flags); 89 + int path_umount(const struct path *path, int flags); 90 90 91 91 int show_path(struct seq_file *m, struct dentry *root); 92 92
+38 -1
fs/mount.h
··· 58 58 #endif 59 59 struct list_head mnt_mounts; /* list of children, anchored here */ 60 60 struct list_head mnt_child; /* and going through their mnt_child */ 61 - struct list_head mnt_instance; /* mount instance on sb->s_mounts */ 61 + struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */ 62 + struct mount * __aligned(1) *mnt_pprev_for_sb; 63 + /* except that LSB of pprev is stolen */ 64 + #define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */ 62 65 const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ 63 66 struct list_head mnt_list; 64 67 struct list_head mnt_expire; /* link in fs-specific expiry list */ ··· 151 148 152 149 extern seqlock_t mount_lock; 153 150 151 + DEFINE_LOCK_GUARD_0(mount_writer, write_seqlock(&mount_lock), 152 + write_sequnlock(&mount_lock)) 153 + DEFINE_LOCK_GUARD_0(mount_locked_reader, read_seqlock_excl(&mount_lock), 154 + read_sequnlock_excl(&mount_lock)) 155 + 154 156 struct proc_mounts { 155 157 struct mnt_namespace *ns; 156 158 struct path root; ··· 231 223 { 232 224 } 233 225 #endif 226 + 227 + static inline struct mount *topmost_overmount(struct mount *m) 228 + { 229 + while (m->overmount) 230 + m = m->overmount; 231 + return m; 232 + } 233 + 234 + static inline bool __test_write_hold(struct mount * __aligned(1) *val) 235 + { 236 + return (unsigned long)val & WRITE_HOLD; 237 + } 238 + 239 + static inline bool test_write_hold(const struct mount *m) 240 + { 241 + return __test_write_hold(m->mnt_pprev_for_sb); 242 + } 243 + 244 + static inline void set_write_hold(struct mount *m) 245 + { 246 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 247 + | WRITE_HOLD); 248 + } 249 + 250 + static inline void clear_write_hold(struct mount *m) 251 + { 252 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 253 + & ~WRITE_HOLD); 254 + } 234 255 235 256 struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
+467 -545
fs/namespace.c
··· 91 91 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ 92 92 static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */ 93 93 94 + static inline void namespace_lock(void); 95 + static void namespace_unlock(void); 96 + DEFINE_LOCK_GUARD_0(namespace_excl, namespace_lock(), namespace_unlock()) 97 + DEFINE_LOCK_GUARD_0(namespace_shared, down_read(&namespace_sem), 98 + up_read(&namespace_sem)) 99 + 100 + DEFINE_FREE(mntput, struct vfsmount *, if (!IS_ERR(_T)) mntput(_T)) 101 + 94 102 #ifdef CONFIG_FSNOTIFY 95 103 LIST_HEAD(notify_list); /* protected by namespace_sem */ 96 104 #endif ··· 371 363 * mnt_want/drop_write() will _keep_ the filesystem 372 364 * r/w. 373 365 */ 374 - bool __mnt_is_readonly(struct vfsmount *mnt) 366 + bool __mnt_is_readonly(const struct vfsmount *mnt) 375 367 { 376 368 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); 377 369 } ··· 411 403 #endif 412 404 } 413 405 414 - static int mnt_is_readonly(struct vfsmount *mnt) 406 + static int mnt_is_readonly(const struct vfsmount *mnt) 415 407 { 416 408 if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) 417 409 return 1; ··· 452 444 mnt_inc_writers(mnt); 453 445 /* 454 446 * The store to mnt_inc_writers must be visible before we pass 455 - * MNT_WRITE_HOLD loop below, so that the slowpath can see our 456 - * incremented count after it has set MNT_WRITE_HOLD. 447 + * WRITE_HOLD loop below, so that the slowpath can see our 448 + * incremented count after it has set WRITE_HOLD. 457 449 */ 458 450 smp_mb(); 459 451 might_lock(&mount_lock.lock); 460 - while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { 452 + while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) { 461 453 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 462 454 cpu_relax(); 463 455 } else { 464 456 /* 465 457 * This prevents priority inversion, if the task 466 - * setting MNT_WRITE_HOLD got preempted on a remote 458 + * setting WRITE_HOLD got preempted on a remote 467 459 * CPU, and it prevents life lock if the task setting 468 - * MNT_WRITE_HOLD has a lower priority and is bound to 460 + * WRITE_HOLD has a lower priority and is bound to 469 461 * the same CPU as the task that is spinning here. 470 462 */ 471 463 preempt_enable(); 472 - lock_mount_hash(); 473 - unlock_mount_hash(); 464 + read_seqlock_excl(&mount_lock); 465 + read_sequnlock_excl(&mount_lock); 474 466 preempt_disable(); 475 467 } 476 468 } 477 469 /* 478 470 * The barrier pairs with the barrier sb_start_ro_state_change() making 479 - * sure that if we see MNT_WRITE_HOLD cleared, we will also see 471 + * sure that if we see WRITE_HOLD cleared, we will also see 480 472 * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in 481 473 * mnt_is_readonly() and bail in case we are racing with remount 482 474 * read-only. ··· 614 606 * a call to mnt_unhold_writers() in order to stop preventing write access to 615 607 * @mnt. 616 608 * 617 - * Context: This function expects lock_mount_hash() to be held serializing 618 - * setting MNT_WRITE_HOLD. 609 + * Context: This function expects to be in mount_locked_reader scope serializing 610 + * setting WRITE_HOLD. 619 611 * Return: On success 0 is returned. 620 612 * On error, -EBUSY is returned. 621 613 */ 622 614 static inline int mnt_hold_writers(struct mount *mnt) 623 615 { 624 - mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 616 + set_write_hold(mnt); 625 617 /* 626 - * After storing MNT_WRITE_HOLD, we'll read the counters. This store 618 + * After storing WRITE_HOLD, we'll read the counters. This store 627 619 * should be visible before we do. 628 620 */ 629 621 smp_mb(); ··· 639 631 * sum up each counter, if we read a counter before it is incremented, 640 632 * but then read another CPU's count which it has been subsequently 641 633 * decremented from -- we would see more decrements than we should. 642 - * MNT_WRITE_HOLD protects against this scenario, because 634 + * WRITE_HOLD protects against this scenario, because 643 635 * mnt_want_write first increments count, then smp_mb, then spins on 644 - * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 636 + * WRITE_HOLD, so it can't be decremented by another CPU while 645 637 * we're counting up here. 646 638 */ 647 639 if (mnt_get_writers(mnt) > 0) ··· 657 649 * Stop preventing write access to @mnt allowing callers to gain write access 658 650 * to @mnt again. 659 651 * 660 - * This function can only be called after a successful call to 661 - * mnt_hold_writers(). 652 + * This function can only be called after a call to mnt_hold_writers(). 662 653 * 663 - * Context: This function expects lock_mount_hash() to be held. 654 + * Context: This function expects to be in the same mount_locked_reader scope 655 + * as the matching mnt_hold_writers(). 664 656 */ 665 657 static inline void mnt_unhold_writers(struct mount *mnt) 666 658 { 659 + if (!test_write_hold(mnt)) 660 + return; 667 661 /* 668 - * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 662 + * MNT_READONLY must become visible before ~WRITE_HOLD, so writers 669 663 * that become unheld will see MNT_READONLY. 670 664 */ 671 665 smp_wmb(); 672 - mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 666 + clear_write_hold(mnt); 667 + } 668 + 669 + static inline void mnt_del_instance(struct mount *m) 670 + { 671 + struct mount **p = m->mnt_pprev_for_sb; 672 + struct mount *next = m->mnt_next_for_sb; 673 + 674 + if (next) 675 + next->mnt_pprev_for_sb = p; 676 + *p = next; 677 + } 678 + 679 + static inline void mnt_add_instance(struct mount *m, struct super_block *s) 680 + { 681 + struct mount *first = s->s_mounts; 682 + 683 + if (first) 684 + first->mnt_pprev_for_sb = &m->mnt_next_for_sb; 685 + m->mnt_next_for_sb = first; 686 + m->mnt_pprev_for_sb = &s->s_mounts; 687 + s->s_mounts = m; 673 688 } 674 689 675 690 static int mnt_make_readonly(struct mount *mnt) ··· 708 677 709 678 int sb_prepare_remount_readonly(struct super_block *sb) 710 679 { 711 - struct mount *mnt; 712 680 int err = 0; 713 681 714 - /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ 682 + /* Racy optimization. Recheck the counter under WRITE_HOLD */ 715 683 if (atomic_long_read(&sb->s_remove_count)) 716 684 return -EBUSY; 717 685 718 - lock_mount_hash(); 719 - list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 720 - if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { 721 - err = mnt_hold_writers(mnt); 686 + guard(mount_locked_reader)(); 687 + 688 + for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { 689 + if (!(m->mnt.mnt_flags & MNT_READONLY)) { 690 + err = mnt_hold_writers(m); 722 691 if (err) 723 692 break; 724 693 } ··· 728 697 729 698 if (!err) 730 699 sb_start_ro_state_change(sb); 731 - list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 732 - if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) 733 - mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 700 + for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { 701 + if (test_write_hold(m)) 702 + clear_write_hold(m); 734 703 } 735 - unlock_mount_hash(); 736 704 737 705 return err; 738 706 } ··· 790 760 } 791 761 792 762 /** 793 - * __lookup_mnt - find first child mount 763 + * __lookup_mnt - mount hash lookup 794 764 * @mnt: parent mount 795 - * @dentry: mountpoint 765 + * @dentry: dentry of mountpoint 796 766 * 797 - * If @mnt has a child mount @c mounted @dentry find and return it. 767 + * If @mnt has a child mount @c mounted on @dentry find and return it. 768 + * Caller must either hold the spinlock component of @mount_lock or 769 + * hold rcu_read_lock(), sample the seqcount component before the call 770 + * and recheck it afterwards. 798 771 * 799 - * Note that the child mount @c need not be unique. There are cases 800 - * where shadow mounts are created. For example, during mount 801 - * propagation when a source mount @mnt whose root got overmounted by a 802 - * mount @o after path lookup but before @namespace_sem could be 803 - * acquired gets copied and propagated. So @mnt gets copied including 804 - * @o. When @mnt is propagated to a destination mount @d that already 805 - * has another mount @n mounted at the same mountpoint then the source 806 - * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on 807 - * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt 808 - * on @dentry. 809 - * 810 - * Return: The first child of @mnt mounted @dentry or NULL. 772 + * Return: The child of @mnt mounted on @dentry or %NULL. 811 773 */ 812 774 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 813 775 { ··· 812 790 return NULL; 813 791 } 814 792 815 - /* 816 - * lookup_mnt - Return the first child mount mounted at path 793 + /** 794 + * lookup_mnt - Return the child mount mounted at given location 795 + * @path: location in the namespace 817 796 * 818 - * "First" means first mounted chronologically. If you create the 819 - * following mounts: 820 - * 821 - * mount /dev/sda1 /mnt 822 - * mount /dev/sda2 /mnt 823 - * mount /dev/sda3 /mnt 824 - * 825 - * Then lookup_mnt() on the base /mnt dentry in the root mount will 826 - * return successively the root dentry and vfsmount of /dev/sda1, then 827 - * /dev/sda2, then /dev/sda3, then NULL. 828 - * 829 - * lookup_mnt takes a reference to the found vfsmount. 797 + * Acquires and returns a new reference to mount at given location 798 + * or %NULL if nothing is mounted there. 830 799 */ 831 800 struct vfsmount *lookup_mnt(const struct path *path) 832 801 { ··· 854 841 { 855 842 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 856 843 struct mount *mnt, *n; 857 - bool is_covered = false; 858 844 859 - down_read(&namespace_sem); 860 - rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { 861 - is_covered = (mnt->mnt_mountpoint == dentry); 862 - if (is_covered) 863 - break; 864 - } 865 - up_read(&namespace_sem); 845 + guard(namespace_shared)(); 866 846 867 - return is_covered; 847 + rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) 848 + if (mnt->mnt_mountpoint == dentry) 849 + return true; 850 + 851 + return false; 868 852 } 869 853 870 854 struct pinned_mountpoint { 871 855 struct hlist_node node; 872 856 struct mountpoint *mp; 857 + struct mount *parent; 873 858 }; 874 859 875 860 static bool lookup_mountpoint(struct dentry *dentry, struct pinned_mountpoint *m) ··· 958 947 } 959 948 } 960 949 961 - static inline int check_mnt(struct mount *mnt) 950 + static inline int check_mnt(const struct mount *mnt) 962 951 { 963 952 return mnt->mnt_ns == current->nsproxy->mnt_ns; 964 953 } ··· 1160 1149 touch_mnt_namespace(n); 1161 1150 } 1162 1151 1152 + static void setup_mnt(struct mount *m, struct dentry *root) 1153 + { 1154 + struct super_block *s = root->d_sb; 1155 + 1156 + atomic_inc(&s->s_active); 1157 + m->mnt.mnt_sb = s; 1158 + m->mnt.mnt_root = dget(root); 1159 + m->mnt_mountpoint = m->mnt.mnt_root; 1160 + m->mnt_parent = m; 1161 + 1162 + guard(mount_locked_reader)(); 1163 + mnt_add_instance(m, s); 1164 + } 1165 + 1163 1166 /** 1164 1167 * vfs_create_mount - Create a mount for a configured superblock 1165 1168 * @fc: The configuration context with the superblock attached ··· 1197 1172 if (fc->sb_flags & SB_KERNMOUNT) 1198 1173 mnt->mnt.mnt_flags = MNT_INTERNAL; 1199 1174 1200 - atomic_inc(&fc->root->d_sb->s_active); 1201 - mnt->mnt.mnt_sb = fc->root->d_sb; 1202 - mnt->mnt.mnt_root = dget(fc->root); 1203 - mnt->mnt_mountpoint = mnt->mnt.mnt_root; 1204 - mnt->mnt_parent = mnt; 1175 + setup_mnt(mnt, fc->root); 1205 1176 1206 - lock_mount_hash(); 1207 - list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); 1208 - unlock_mount_hash(); 1209 1177 return &mnt->mnt; 1210 1178 } 1211 1179 EXPORT_SYMBOL(vfs_create_mount); ··· 1256 1238 static struct mount *clone_mnt(struct mount *old, struct dentry *root, 1257 1239 int flag) 1258 1240 { 1259 - struct super_block *sb = old->mnt.mnt_sb; 1260 1241 struct mount *mnt; 1261 1242 int err; 1262 1243 ··· 1280 1263 if (mnt->mnt_group_id) 1281 1264 set_mnt_shared(mnt); 1282 1265 1283 - atomic_inc(&sb->s_active); 1284 1266 mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); 1285 1267 1286 - mnt->mnt.mnt_sb = sb; 1287 - mnt->mnt.mnt_root = dget(root); 1288 - mnt->mnt_mountpoint = mnt->mnt.mnt_root; 1289 - mnt->mnt_parent = mnt; 1290 - lock_mount_hash(); 1291 - list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 1292 - unlock_mount_hash(); 1268 + setup_mnt(mnt, root); 1293 1269 1294 1270 if (flag & CL_PRIVATE) // we are done with it 1295 1271 return mnt; ··· 1388 1378 mnt->mnt.mnt_flags |= MNT_DOOMED; 1389 1379 rcu_read_unlock(); 1390 1380 1391 - list_del(&mnt->mnt_instance); 1381 + mnt_del_instance(mnt); 1392 1382 if (unlikely(!list_empty(&mnt->mnt_expire))) 1393 1383 list_del(&mnt->mnt_expire); 1394 1384 ··· 1729 1719 down_write(&namespace_sem); 1730 1720 } 1731 1721 1732 - DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) 1733 - 1734 1722 enum umount_tree_flags { 1735 1723 UMOUNT_SYNC = 1, 1736 1724 UMOUNT_PROPAGATE = 2, ··· 1793 1785 if (how & UMOUNT_PROPAGATE) 1794 1786 propagate_umount(&tmp_list); 1795 1787 1788 + bulk_make_private(&tmp_list); 1789 + 1796 1790 while (!list_empty(&tmp_list)) { 1797 1791 struct mnt_namespace *ns; 1798 1792 bool disconnect; ··· 1819 1809 umount_mnt(p); 1820 1810 } 1821 1811 } 1822 - change_mnt_propagation(p, MS_PRIVATE); 1823 1812 if (disconnect) 1824 1813 hlist_add_head(&p->mnt_umount, &unmounted); 1825 1814 ··· 1978 1969 struct pinned_mountpoint mp = {}; 1979 1970 struct mount *mnt; 1980 1971 1981 - namespace_lock(); 1982 - lock_mount_hash(); 1972 + guard(namespace_excl)(); 1973 + guard(mount_writer)(); 1974 + 1983 1975 if (!lookup_mountpoint(dentry, &mp)) 1984 - goto out_unlock; 1976 + return; 1985 1977 1986 1978 event++; 1987 1979 while (mp.node.next) { ··· 1994 1984 else umount_tree(mnt, UMOUNT_CONNECTED); 1995 1985 } 1996 1986 unpin_mountpoint(&mp); 1997 - out_unlock: 1998 - unlock_mount_hash(); 1999 - namespace_unlock(); 2000 1987 } 2001 1988 2002 1989 /* ··· 2032 2025 } 2033 2026 2034 2027 // caller is responsible for flags being sane 2035 - int path_umount(struct path *path, int flags) 2028 + int path_umount(const struct path *path, int flags) 2036 2029 { 2037 2030 struct mount *mnt = real_mount(path->mnt); 2038 2031 int ret; ··· 2245 2238 return p; 2246 2239 } 2247 2240 2248 - struct path *collect_paths(const struct path *path, 2241 + const struct path *collect_paths(const struct path *path, 2249 2242 struct path *prealloc, unsigned count) 2250 2243 { 2251 2244 struct mount *root = real_mount(path->mnt); ··· 2253 2246 struct path *res = prealloc, *to_free = NULL; 2254 2247 unsigned n = 0; 2255 2248 2256 - guard(rwsem_read)(&namespace_sem); 2249 + guard(namespace_shared)(); 2257 2250 2258 2251 if (!check_mnt(root)) 2259 2252 return ERR_PTR(-EINVAL); ··· 2279 2272 return res; 2280 2273 } 2281 2274 2282 - void drop_collected_paths(struct path *paths, struct path *prealloc) 2275 + void drop_collected_paths(const struct path *paths, const struct path *prealloc) 2283 2276 { 2284 - for (struct path *p = paths; p->mnt; p++) 2277 + for (const struct path *p = paths; p->mnt; p++) 2285 2278 path_put(p); 2286 2279 if (paths != prealloc) 2287 2280 kfree(paths); ··· 2308 2301 return; 2309 2302 } 2310 2303 2311 - scoped_guard(namespace_lock, &namespace_sem) { 2304 + scoped_guard(namespace_excl) { 2312 2305 if (!anon_ns_root(m)) 2313 2306 return; 2314 2307 ··· 2319 2312 } 2320 2313 } 2321 2314 2315 + /* locks: namespace_shared && pinned(mnt) || mount_locked_reader */ 2322 2316 static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) 2323 2317 { 2324 2318 struct mount *child; ··· 2336 2328 2337 2329 bool has_locked_children(struct mount *mnt, struct dentry *dentry) 2338 2330 { 2339 - bool res; 2340 - 2341 - read_seqlock_excl(&mount_lock); 2342 - res = __has_locked_children(mnt, dentry); 2343 - read_sequnlock_excl(&mount_lock); 2344 - return res; 2331 + guard(mount_locked_reader)(); 2332 + return __has_locked_children(mnt, dentry); 2345 2333 } 2346 2334 2347 2335 /* ··· 2345 2341 * specified subtree. Such references can act as pins for mount namespaces 2346 2342 * that aren't checked by the mount-cycle checking code, thereby allowing 2347 2343 * cycles to be made. 2344 + * 2345 + * locks: mount_locked_reader || namespace_shared && pinned(subtree) 2348 2346 */ 2349 2347 static bool check_for_nsfs_mounts(struct mount *subtree) 2350 2348 { 2351 - struct mount *p; 2352 - bool ret = false; 2353 - 2354 - lock_mount_hash(); 2355 - for (p = subtree; p; p = next_mnt(p, subtree)) 2349 + for (struct mount *p = subtree; p; p = next_mnt(p, subtree)) 2356 2350 if (mnt_ns_loop(p->mnt.mnt_root)) 2357 - goto out; 2358 - 2359 - ret = true; 2360 - out: 2361 - unlock_mount_hash(); 2362 - return ret; 2351 + return false; 2352 + return true; 2363 2353 } 2364 2354 2365 2355 /** ··· 2373 2375 struct mount *old_mnt = real_mount(path->mnt); 2374 2376 struct mount *new_mnt; 2375 2377 2376 - guard(rwsem_read)(&namespace_sem); 2378 + guard(namespace_shared)(); 2377 2379 2378 2380 if (IS_MNT_UNBINDABLE(old_mnt)) 2379 2381 return ERR_PTR(-EINVAL); ··· 2494 2496 /** 2495 2497 * attach_recursive_mnt - attach a source mount tree 2496 2498 * @source_mnt: mount tree to be attached 2497 - * @dest_mnt: mount that @source_mnt will be mounted on 2498 - * @dest_mp: the mountpoint @source_mnt will be mounted at 2499 + * @dest: the context for mounting at the place where the tree should go 2499 2500 * 2500 2501 * NOTE: in the table below explains the semantics when a source mount 2501 2502 * of a given type is attached to a destination mount of a given type. ··· 2557 2560 * Otherwise a negative error code is returned. 2558 2561 */ 2559 2562 static int attach_recursive_mnt(struct mount *source_mnt, 2560 - struct mount *dest_mnt, 2561 - struct mountpoint *dest_mp) 2563 + const struct pinned_mountpoint *dest) 2562 2564 { 2563 2565 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 2566 + struct mount *dest_mnt = dest->parent; 2567 + struct mountpoint *dest_mp = dest->mp; 2564 2568 HLIST_HEAD(tree_list); 2565 2569 struct mnt_namespace *ns = dest_mnt->mnt_ns; 2566 2570 struct pinned_mountpoint root = {}; ··· 2641 2643 child->mnt_mountpoint); 2642 2644 commit_tree(child); 2643 2645 if (q) { 2646 + struct mount *r = topmost_overmount(child); 2644 2647 struct mountpoint *mp = root.mp; 2645 - struct mount *r = child; 2646 - while (unlikely(r->overmount)) 2647 - r = r->overmount; 2648 + 2648 2649 if (unlikely(shorter) && child != source_mnt) 2649 2650 mp = shorter; 2650 2651 mnt_change_mountpoint(r, mp, q); ··· 2672 2675 return err; 2673 2676 } 2674 2677 2675 - /** 2676 - * do_lock_mount - lock mount and mountpoint 2677 - * @path: target path 2678 - * @beneath: whether the intention is to mount beneath @path 2679 - * 2680 - * Follow the mount stack on @path until the top mount @mnt is found. If 2681 - * the initial @path->{mnt,dentry} is a mountpoint lookup the first 2682 - * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root} 2683 - * until nothing is stacked on top of it anymore. 2684 - * 2685 - * Acquire the inode_lock() on the top mount's ->mnt_root to protect 2686 - * against concurrent removal of the new mountpoint from another mount 2687 - * namespace. 2688 - * 2689 - * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint 2690 - * @mp on @mnt->mnt_parent must be acquired. This protects against a 2691 - * concurrent unlink of @mp->mnt_dentry from another mount namespace 2692 - * where @mnt doesn't have a child mount mounted @mp. A concurrent 2693 - * removal of @mnt->mnt_root doesn't matter as nothing will be mounted 2694 - * on top of it for @beneath. 2695 - * 2696 - * In addition, @beneath needs to make sure that @mnt hasn't been 2697 - * unmounted or moved from its current mountpoint in between dropping 2698 - * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt 2699 - * being unmounted would be detected later by e.g., calling 2700 - * check_mnt(mnt) in the function it's called from. For the @beneath 2701 - * case however, it's useful to detect it directly in do_lock_mount(). 2702 - * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points 2703 - * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will 2704 - * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL. 2705 - * 2706 - * Return: Either the target mountpoint on the top mount or the top 2707 - * mount's mountpoint. 2708 - */ 2709 - static int do_lock_mount(struct path *path, struct pinned_mountpoint *pinned, bool beneath) 2678 + static inline struct mount *where_to_mount(const struct path *path, 2679 + struct dentry **dentry, 2680 + bool beneath) 2710 2681 { 2711 - struct vfsmount *mnt = path->mnt; 2712 - struct dentry *dentry; 2713 - struct path under = {}; 2714 - int err = -ENOENT; 2682 + struct mount *m; 2715 2683 2716 - for (;;) { 2717 - struct mount *m = real_mount(mnt); 2684 + if (unlikely(beneath)) { 2685 + m = topmost_overmount(real_mount(path->mnt)); 2686 + *dentry = m->mnt_mountpoint; 2687 + return m->mnt_parent; 2688 + } 2689 + m = __lookup_mnt(path->mnt, path->dentry); 2690 + if (unlikely(m)) { 2691 + m = topmost_overmount(m); 2692 + *dentry = m->mnt.mnt_root; 2693 + return m; 2694 + } 2695 + *dentry = path->dentry; 2696 + return real_mount(path->mnt); 2697 + } 2718 2698 2719 - if (beneath) { 2720 - path_put(&under); 2721 - read_seqlock_excl(&mount_lock); 2722 - under.mnt = mntget(&m->mnt_parent->mnt); 2723 - under.dentry = dget(m->mnt_mountpoint); 2724 - read_sequnlock_excl(&mount_lock); 2725 - dentry = under.dentry; 2726 - } else { 2727 - dentry = path->dentry; 2699 + /** 2700 + * do_lock_mount - acquire environment for mounting 2701 + * @path: target path 2702 + * @res: context to set up 2703 + * @beneath: whether the intention is to mount beneath @path 2704 + * 2705 + * To mount something at given location, we need 2706 + * namespace_sem locked exclusive 2707 + * inode of dentry we are mounting on locked exclusive 2708 + * struct mountpoint for that dentry 2709 + * struct mount we are mounting on 2710 + * 2711 + * Results are stored in caller-supplied context (pinned_mountpoint); 2712 + * on success we have res->parent and res->mp pointing to parent and 2713 + * mountpoint respectively and res->node inserted into the ->m_list 2714 + * of the mountpoint, making sure the mountpoint won't disappear. 2715 + * On failure we have res->parent set to ERR_PTR(-E...), res->mp 2716 + * left NULL, res->node - empty. 2717 + * In case of success do_lock_mount returns with locks acquired (in 2718 + * proper order - inode lock nests outside of namespace_sem). 2719 + * 2720 + * Request to mount on overmounted location is treated as "mount on 2721 + * top of whatever's overmounting it"; request to mount beneath 2722 + * a location - "mount immediately beneath the topmost mount at that 2723 + * place". 2724 + * 2725 + * In all cases the location must not have been unmounted and the 2726 + * chosen mountpoint must be allowed to be mounted on. For "beneath" 2727 + * case we also require the location to be at the root of a mount 2728 + * that has a parent (i.e. is not a root of some namespace). 2729 + */ 2730 + static void do_lock_mount(const struct path *path, 2731 + struct pinned_mountpoint *res, 2732 + bool beneath) 2733 + { 2734 + int err; 2735 + 2736 + if (unlikely(beneath) && !path_mounted(path)) { 2737 + res->parent = ERR_PTR(-EINVAL); 2738 + return; 2739 + } 2740 + 2741 + do { 2742 + struct dentry *dentry, *d; 2743 + struct mount *m, *n; 2744 + 2745 + scoped_guard(mount_locked_reader) { 2746 + m = where_to_mount(path, &dentry, beneath); 2747 + if (&m->mnt != path->mnt) { 2748 + mntget(&m->mnt); 2749 + dget(dentry); 2750 + } 2728 2751 } 2729 2752 2730 2753 inode_lock(dentry->d_inode); 2731 2754 namespace_lock(); 2732 2755 2733 - if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) 2734 - break; // not to be mounted on 2756 + // check if the chain of mounts (if any) has changed. 2757 + scoped_guard(mount_locked_reader) 2758 + n = where_to_mount(path, &d, beneath); 2735 2759 2736 - if (beneath && unlikely(m->mnt_mountpoint != dentry || 2737 - &m->mnt_parent->mnt != under.mnt)) { 2760 + if (unlikely(n != m || dentry != d)) 2761 + err = -EAGAIN; // something moved, retry 2762 + else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt))) 2763 + err = -ENOENT; // not to be mounted on 2764 + else if (beneath && &m->mnt == path->mnt && !m->overmount) 2765 + err = -EINVAL; 2766 + else 2767 + err = get_mountpoint(dentry, res); 2768 + 2769 + if (unlikely(err)) { 2770 + res->parent = ERR_PTR(err); 2738 2771 namespace_unlock(); 2739 2772 inode_unlock(dentry->d_inode); 2740 - continue; // got moved 2773 + } else { 2774 + res->parent = m; 2741 2775 } 2742 - 2743 - mnt = lookup_mnt(path); 2744 - if (unlikely(mnt)) { 2745 - namespace_unlock(); 2746 - inode_unlock(dentry->d_inode); 2747 - path_put(path); 2748 - path->mnt = mnt; 2749 - path->dentry = dget(mnt->mnt_root); 2750 - continue; // got overmounted 2776 + /* 2777 + * Drop the temporary references. This is subtle - on success 2778 + * we are doing that under namespace_sem, which would normally 2779 + * be forbidden. However, in that case we are guaranteed that 2780 + * refcounts won't reach zero, since we know that path->mnt 2781 + * is mounted and thus all mounts reachable from it are pinned 2782 + * and stable, along with their mountpoints and roots. 2783 + */ 2784 + if (&m->mnt != path->mnt) { 2785 + dput(dentry); 2786 + mntput(&m->mnt); 2751 2787 } 2752 - err = get_mountpoint(dentry, pinned); 2753 - if (err) 2754 - break; 2755 - if (beneath) { 2756 - /* 2757 - * @under duplicates the references that will stay 2758 - * at least until namespace_unlock(), so the path_put() 2759 - * below is safe (and OK to do under namespace_lock - 2760 - * we are not dropping the final references here). 2761 - */ 2762 - path_put(&under); 2763 - } 2764 - return 0; 2765 - } 2766 - namespace_unlock(); 2767 - inode_unlock(dentry->d_inode); 2768 - if (beneath) 2769 - path_put(&under); 2770 - return err; 2788 + } while (err == -EAGAIN); 2771 2789 } 2772 2790 2773 - static inline int lock_mount(struct path *path, struct pinned_mountpoint *m) 2774 - { 2775 - return do_lock_mount(path, m, false); 2776 - } 2777 - 2778 - static void unlock_mount(struct pinned_mountpoint *m) 2791 + static void __unlock_mount(struct pinned_mountpoint *m) 2779 2792 { 2780 2793 inode_unlock(m->mp->m_dentry->d_inode); 2781 2794 read_seqlock_excl(&mount_lock); ··· 2794 2787 namespace_unlock(); 2795 2788 } 2796 2789 2797 - static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) 2790 + static inline void unlock_mount(struct pinned_mountpoint *m) 2791 + { 2792 + if (!IS_ERR(m->parent)) 2793 + __unlock_mount(m); 2794 + } 2795 + 2796 + #define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \ 2797 + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ 2798 + do_lock_mount((path), &mp, (beneath)) 2799 + #define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false) 2800 + #define LOCK_MOUNT_EXACT(mp, path) \ 2801 + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ 2802 + lock_mount_exact((path), &mp) 2803 + 2804 + static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp) 2798 2805 { 2799 2806 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) 2800 2807 return -EINVAL; 2801 2808 2802 - if (d_is_dir(mp->m_dentry) != 2809 + if (d_is_dir(mp->mp->m_dentry) != 2803 2810 d_is_dir(mnt->mnt.mnt_root)) 2804 2811 return -ENOTDIR; 2805 2812 2806 - return attach_recursive_mnt(mnt, p, mp); 2813 + return attach_recursive_mnt(mnt, mp); 2807 2814 } 2808 2815 2809 2816 static int may_change_propagation(const struct mount *m) ··· 2853 2832 /* 2854 2833 * recursively change the type of the mountpoint. 2855 2834 */ 2856 - static int do_change_type(struct path *path, int ms_flags) 2835 + static int do_change_type(const struct path *path, int ms_flags) 2857 2836 { 2858 2837 struct mount *m; 2859 2838 struct mount *mnt = real_mount(path->mnt); 2860 2839 int recurse = ms_flags & MS_REC; 2861 2840 int type; 2862 - int err = 0; 2841 + int err; 2863 2842 2864 2843 if (!path_mounted(path)) 2865 2844 return -EINVAL; ··· 2868 2847 if (!type) 2869 2848 return -EINVAL; 2870 2849 2871 - namespace_lock(); 2850 + guard(namespace_excl)(); 2851 + 2872 2852 err = may_change_propagation(mnt); 2873 2853 if (err) 2874 - goto out_unlock; 2854 + return err; 2875 2855 2876 2856 if (type == MS_SHARED) { 2877 2857 err = invent_group_ids(mnt, recurse); 2878 2858 if (err) 2879 - goto out_unlock; 2859 + return err; 2880 2860 } 2881 2861 2882 2862 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 2883 2863 change_mnt_propagation(m, type); 2884 2864 2885 - out_unlock: 2886 - namespace_unlock(); 2887 - return err; 2865 + return 0; 2888 2866 } 2889 2867 2890 2868 /* may_copy_tree() - check if a mount tree can be copied ··· 2929 2909 * 2930 2910 * Returns true if the mount tree can be copied, false otherwise. 2931 2911 */ 2932 - static inline bool may_copy_tree(struct path *path) 2912 + static inline bool may_copy_tree(const struct path *path) 2933 2913 { 2934 2914 struct mount *mnt = real_mount(path->mnt); 2935 2915 const struct dentry_operations *d_op; ··· 2951 2931 } 2952 2932 2953 2933 2954 - static struct mount *__do_loopback(struct path *old_path, int recurse) 2934 + static struct mount *__do_loopback(const struct path *old_path, int recurse) 2955 2935 { 2956 2936 struct mount *old = real_mount(old_path->mnt); 2957 2937 ··· 2973 2953 /* 2974 2954 * do loopback mount. 2975 2955 */ 2976 - static int do_loopback(struct path *path, const char *old_name, 2977 - int recurse) 2956 + static int do_loopback(const struct path *path, const char *old_name, 2957 + int recurse) 2978 2958 { 2979 - struct path old_path; 2980 - struct mount *mnt = NULL, *parent; 2981 - struct pinned_mountpoint mp = {}; 2959 + struct path old_path __free(path_put) = {}; 2960 + struct mount *mnt = NULL; 2982 2961 int err; 2983 2962 if (!old_name || !*old_name) 2984 2963 return -EINVAL; ··· 2985 2966 if (err) 2986 2967 return err; 2987 2968 2988 - err = -EINVAL; 2989 2969 if (mnt_ns_loop(old_path.dentry)) 2990 - goto out; 2970 + return -EINVAL; 2991 2971 2992 - err = lock_mount(path, &mp); 2993 - if (err) 2994 - goto out; 2972 + LOCK_MOUNT(mp, path); 2973 + if (IS_ERR(mp.parent)) 2974 + return PTR_ERR(mp.parent); 2995 2975 2996 - parent = real_mount(path->mnt); 2997 - if (!check_mnt(parent)) 2998 - goto out2; 2976 + if (!check_mnt(mp.parent)) 2977 + return -EINVAL; 2999 2978 3000 2979 mnt = __do_loopback(&old_path, recurse); 3001 - if (IS_ERR(mnt)) { 3002 - err = PTR_ERR(mnt); 3003 - goto out2; 3004 - } 2980 + if (IS_ERR(mnt)) 2981 + return PTR_ERR(mnt); 3005 2982 3006 - err = graft_tree(mnt, parent, mp.mp); 2983 + err = graft_tree(mnt, &mp); 3007 2984 if (err) { 3008 2985 lock_mount_hash(); 3009 2986 umount_tree(mnt, UMOUNT_SYNC); 3010 2987 unlock_mount_hash(); 3011 2988 } 3012 - out2: 3013 - unlock_mount(&mp); 3014 - out: 3015 - path_put(&old_path); 3016 2989 return err; 3017 2990 } 3018 2991 3019 - static struct file *open_detached_copy(struct path *path, bool recursive) 2992 + static struct mnt_namespace *get_detached_copy(const struct path *path, bool recursive) 3020 2993 { 3021 2994 struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns; 3022 2995 struct user_namespace *user_ns = mnt_ns->user_ns; 3023 2996 struct mount *mnt, *p; 3024 - struct file *file; 3025 2997 3026 2998 ns = alloc_mnt_ns(user_ns, true); 3027 2999 if (IS_ERR(ns)) 3028 - return ERR_CAST(ns); 3000 + return ns; 3029 3001 3030 - namespace_lock(); 3002 + guard(namespace_excl)(); 3031 3003 3032 3004 /* 3033 3005 * Record the sequence number of the source mount namespace. ··· 3035 3025 3036 3026 mnt = __do_loopback(path, recursive); 3037 3027 if (IS_ERR(mnt)) { 3038 - namespace_unlock(); 3039 - free_mnt_ns(ns); 3028 + emptied_ns = ns; 3040 3029 return ERR_CAST(mnt); 3041 3030 } 3042 3031 3043 - lock_mount_hash(); 3044 3032 for (p = mnt; p; p = next_mnt(p, mnt)) { 3045 3033 mnt_add_to_ns(ns, p); 3046 3034 ns->nr_mounts++; 3047 3035 } 3048 3036 ns->root = mnt; 3049 - mntget(&mnt->mnt); 3050 - unlock_mount_hash(); 3051 - namespace_unlock(); 3037 + return ns; 3038 + } 3039 + 3040 + static struct file *open_detached_copy(struct path *path, bool recursive) 3041 + { 3042 + struct mnt_namespace *ns = get_detached_copy(path, recursive); 3043 + struct file *file; 3044 + 3045 + if (IS_ERR(ns)) 3046 + return ERR_CAST(ns); 3052 3047 3053 3048 mntput(path->mnt); 3054 - path->mnt = &mnt->mnt; 3049 + path->mnt = mntget(&ns->root->mnt); 3055 3050 file = dentry_open(path, O_PATH, current_cred()); 3056 3051 if (IS_ERR(file)) 3057 3052 dissolve_on_fput(path->mnt); ··· 3173 3158 touch_mnt_namespace(mnt->mnt_ns); 3174 3159 } 3175 3160 3176 - static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt) 3161 + static void mnt_warn_timestamp_expiry(const struct path *mountpoint, 3162 + struct vfsmount *mnt) 3177 3163 { 3178 3164 struct super_block *sb = mnt->mnt_sb; 3179 3165 ··· 3208 3192 * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND 3209 3193 * to mount(2). 3210 3194 */ 3211 - static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) 3195 + static int do_reconfigure_mnt(const struct path *path, unsigned int mnt_flags) 3212 3196 { 3213 3197 struct super_block *sb = path->mnt->mnt_sb; 3214 3198 struct mount *mnt = real_mount(path->mnt); ··· 3245 3229 * If you've mounted a non-root directory somewhere and want to do remount 3246 3230 * on it - tough luck. 3247 3231 */ 3248 - static int do_remount(struct path *path, int sb_flags, 3232 + static int do_remount(const struct path *path, int sb_flags, 3249 3233 int mnt_flags, void *data) 3250 3234 { 3251 3235 int err; ··· 3303 3287 return 0; 3304 3288 } 3305 3289 3306 - static int do_set_group(struct path *from_path, struct path *to_path) 3290 + static int do_set_group(const struct path *from_path, const struct path *to_path) 3307 3291 { 3308 - struct mount *from, *to; 3292 + struct mount *from = real_mount(from_path->mnt); 3293 + struct mount *to = real_mount(to_path->mnt); 3309 3294 int err; 3310 3295 3311 - from = real_mount(from_path->mnt); 3312 - to = real_mount(to_path->mnt); 3313 - 3314 - namespace_lock(); 3296 + guard(namespace_excl)(); 3315 3297 3316 3298 err = may_change_propagation(from); 3317 3299 if (err) 3318 - goto out; 3300 + return err; 3319 3301 err = may_change_propagation(to); 3320 3302 if (err) 3321 - goto out; 3303 + return err; 3322 3304 3323 - err = -EINVAL; 3324 3305 /* To and From paths should be mount roots */ 3325 3306 if (!path_mounted(from_path)) 3326 - goto out; 3307 + return -EINVAL; 3327 3308 if (!path_mounted(to_path)) 3328 - goto out; 3309 + return -EINVAL; 3329 3310 3330 3311 /* Setting sharing groups is only allowed across same superblock */ 3331 3312 if (from->mnt.mnt_sb != to->mnt.mnt_sb) 3332 - goto out; 3313 + return -EINVAL; 3333 3314 3334 3315 /* From mount root should be wider than To mount root */ 3335 3316 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root)) 3336 - goto out; 3317 + return -EINVAL; 3337 3318 3338 3319 /* From mount should not have locked children in place of To's root */ 3339 3320 if (__has_locked_children(from, to->mnt.mnt_root)) 3340 - goto out; 3321 + return -EINVAL; 3341 3322 3342 3323 /* Setting sharing groups is only allowed on private mounts */ 3343 3324 if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to)) 3344 - goto out; 3325 + return -EINVAL; 3345 3326 3346 3327 /* From should not be private */ 3347 3328 if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from)) 3348 - goto out; 3329 + return -EINVAL; 3349 3330 3350 3331 if (IS_MNT_SLAVE(from)) { 3351 3332 hlist_add_behind(&to->mnt_slave, &from->mnt_slave); ··· 3354 3341 list_add(&to->mnt_share, &from->mnt_share); 3355 3342 set_mnt_shared(to); 3356 3343 } 3357 - 3358 - err = 0; 3359 - out: 3360 - namespace_unlock(); 3361 - return err; 3344 + return 0; 3362 3345 } 3363 3346 3364 3347 /** ··· 3398 3389 3399 3390 /** 3400 3391 * can_move_mount_beneath - check that we can mount beneath the top mount 3401 - * @from: mount to mount beneath 3402 - * @to: mount under which to mount 3403 - * @mp: mountpoint of @to 3392 + * @mnt_from: mount we are trying to move 3393 + * @mnt_to: mount under which to mount 3394 + * @mp: mountpoint of @mnt_to 3404 3395 * 3405 - * - Make sure that @to->dentry is actually the root of a mount under 3406 - * which we can mount another mount. 3407 3396 * - Make sure that nothing can be mounted beneath the caller's current 3408 3397 * root or the rootfs of the namespace. 3409 3398 * - Make sure that the caller can unmount the topmost mount ensuring 3410 3399 * that the caller could reveal the underlying mountpoint. 3411 - * - Ensure that nothing has been mounted on top of @from before we 3400 + * - Ensure that nothing has been mounted on top of @mnt_from before we 3412 3401 * grabbed @namespace_sem to avoid creating pointless shadow mounts. 3413 3402 * - Prevent mounting beneath a mount if the propagation relationship 3414 3403 * between the source mount, parent mount, and top mount would lead to ··· 3415 3408 * Context: This function expects namespace_lock() to be held. 3416 3409 * Return: On success 0, and on error a negative error code is returned. 3417 3410 */ 3418 - static int can_move_mount_beneath(const struct path *from, 3419 - const struct path *to, 3411 + static int can_move_mount_beneath(const struct mount *mnt_from, 3412 + const struct mount *mnt_to, 3420 3413 const struct mountpoint *mp) 3421 3414 { 3422 - struct mount *mnt_from = real_mount(from->mnt), 3423 - *mnt_to = real_mount(to->mnt), 3424 - *parent_mnt_to = mnt_to->mnt_parent; 3425 - 3426 - if (!mnt_has_parent(mnt_to)) 3427 - return -EINVAL; 3428 - 3429 - if (!path_mounted(to)) 3430 - return -EINVAL; 3415 + struct mount *parent_mnt_to = mnt_to->mnt_parent; 3431 3416 3432 3417 if (IS_MNT_LOCKED(mnt_to)) 3433 3418 return -EINVAL; 3434 3419 3435 3420 /* Avoid creating shadow mounts during mount propagation. */ 3436 - if (path_overmounted(from)) 3421 + if (mnt_from->overmount) 3437 3422 return -EINVAL; 3438 3423 3439 3424 /* ··· 3516 3517 return check_anonymous_mnt(mnt); 3517 3518 } 3518 3519 3519 - static int do_move_mount(struct path *old_path, 3520 - struct path *new_path, enum mnt_tree_flags_t flags) 3520 + static int do_move_mount(const struct path *old_path, 3521 + const struct path *new_path, 3522 + enum mnt_tree_flags_t flags) 3521 3523 { 3522 - struct mnt_namespace *ns; 3523 - struct mount *p; 3524 - struct mount *old; 3525 - struct mount *parent; 3526 - struct pinned_mountpoint mp; 3524 + struct mount *old = real_mount(old_path->mnt); 3527 3525 int err; 3528 3526 bool beneath = flags & MNT_TREE_BENEATH; 3529 3527 3530 - err = do_lock_mount(new_path, &mp, beneath); 3531 - if (err) 3532 - return err; 3528 + if (!path_mounted(old_path)) 3529 + return -EINVAL; 3533 3530 3534 - old = real_mount(old_path->mnt); 3535 - p = real_mount(new_path->mnt); 3536 - parent = old->mnt_parent; 3537 - ns = old->mnt_ns; 3531 + if (d_is_dir(new_path->dentry) != d_is_dir(old_path->dentry)) 3532 + return -EINVAL; 3538 3533 3539 - err = -EINVAL; 3534 + LOCK_MOUNT_MAYBE_BENEATH(mp, new_path, beneath); 3535 + if (IS_ERR(mp.parent)) 3536 + return PTR_ERR(mp.parent); 3540 3537 3541 3538 if (check_mnt(old)) { 3542 3539 /* if the source is in our namespace... */ 3543 3540 /* ... it should be detachable from parent */ 3544 3541 if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) 3545 - goto out; 3542 + return -EINVAL; 3543 + /* ... which should not be shared */ 3544 + if (IS_MNT_SHARED(old->mnt_parent)) 3545 + return -EINVAL; 3546 3546 /* ... and the target should be in our namespace */ 3547 - if (!check_mnt(p)) 3548 - goto out; 3549 - /* parent of the source should not be shared */ 3550 - if (IS_MNT_SHARED(parent)) 3551 - goto out; 3547 + if (!check_mnt(mp.parent)) 3548 + return -EINVAL; 3552 3549 } else { 3553 3550 /* 3554 3551 * otherwise the source must be the root of some anon namespace. 3555 3552 */ 3556 3553 if (!anon_ns_root(old)) 3557 - goto out; 3554 + return -EINVAL; 3558 3555 /* 3559 3556 * Bail out early if the target is within the same namespace - 3560 3557 * subsequent checks would've rejected that, but they lose 3561 3558 * some corner cases if we check it early. 3562 3559 */ 3563 - if (ns == p->mnt_ns) 3564 - goto out; 3560 + if (old->mnt_ns == mp.parent->mnt_ns) 3561 + return -EINVAL; 3565 3562 /* 3566 3563 * Target should be either in our namespace or in an acceptable 3567 3564 * anon namespace, sensu check_anonymous_mnt(). 3568 3565 */ 3569 - if (!may_use_mount(p)) 3570 - goto out; 3566 + if (!may_use_mount(mp.parent)) 3567 + return -EINVAL; 3571 3568 } 3572 3569 3573 - if (!path_mounted(old_path)) 3574 - goto out; 3575 - 3576 - if (d_is_dir(new_path->dentry) != 3577 - d_is_dir(old_path->dentry)) 3578 - goto out; 3579 - 3580 3570 if (beneath) { 3581 - err = can_move_mount_beneath(old_path, new_path, mp.mp); 3582 - if (err) 3583 - goto out; 3571 + struct mount *over = real_mount(new_path->mnt); 3584 3572 3585 - err = -EINVAL; 3586 - p = p->mnt_parent; 3573 + if (mp.parent != over->mnt_parent) 3574 + over = mp.parent->overmount; 3575 + err = can_move_mount_beneath(old, over, mp.mp); 3576 + if (err) 3577 + return err; 3587 3578 } 3588 3579 3589 3580 /* 3590 3581 * Don't move a mount tree containing unbindable mounts to a destination 3591 3582 * mount which is shared. 3592 3583 */ 3593 - if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) 3594 - goto out; 3595 - err = -ELOOP; 3584 + if (IS_MNT_SHARED(mp.parent) && tree_contains_unbindable(old)) 3585 + return -EINVAL; 3596 3586 if (!check_for_nsfs_mounts(old)) 3597 - goto out; 3598 - if (mount_is_ancestor(old, p)) 3599 - goto out; 3587 + return -ELOOP; 3588 + if (mount_is_ancestor(old, mp.parent)) 3589 + return -ELOOP; 3600 3590 3601 - err = attach_recursive_mnt(old, p, mp.mp); 3602 - out: 3603 - unlock_mount(&mp); 3604 - return err; 3591 + return attach_recursive_mnt(old, &mp); 3605 3592 } 3606 3593 3607 - static int do_move_mount_old(struct path *path, const char *old_name) 3594 + static int do_move_mount_old(const struct path *path, const char *old_name) 3608 3595 { 3609 - struct path old_path; 3596 + struct path old_path __free(path_put) = {}; 3610 3597 int err; 3611 3598 3612 3599 if (!old_name || !*old_name) ··· 3602 3617 if (err) 3603 3618 return err; 3604 3619 3605 - err = do_move_mount(&old_path, path, 0); 3606 - path_put(&old_path); 3607 - return err; 3620 + return do_move_mount(&old_path, path, 0); 3608 3621 } 3609 3622 3610 3623 /* 3611 3624 * add a mount into a namespace's mount tree 3612 3625 */ 3613 - static int do_add_mount(struct mount *newmnt, struct mountpoint *mp, 3614 - const struct path *path, int mnt_flags) 3626 + static int do_add_mount(struct mount *newmnt, const struct pinned_mountpoint *mp, 3627 + int mnt_flags) 3615 3628 { 3616 - struct mount *parent = real_mount(path->mnt); 3629 + struct mount *parent = mp->parent; 3630 + 3631 + if (IS_ERR(parent)) 3632 + return PTR_ERR(parent); 3617 3633 3618 3634 mnt_flags &= ~MNT_INTERNAL_FLAGS; 3619 3635 ··· 3628 3642 } 3629 3643 3630 3644 /* Refuse the same filesystem on the same mount point */ 3631 - if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path)) 3645 + if (parent->mnt.mnt_sb == newmnt->mnt.mnt_sb && 3646 + parent->mnt.mnt_root == mp->mp->m_dentry) 3632 3647 return -EBUSY; 3633 3648 3634 3649 if (d_is_symlink(newmnt->mnt.mnt_root)) 3635 3650 return -EINVAL; 3636 3651 3637 3652 newmnt->mnt.mnt_flags = mnt_flags; 3638 - return graft_tree(newmnt, parent, mp); 3653 + return graft_tree(newmnt, mp); 3639 3654 } 3640 3655 3641 3656 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); ··· 3645 3658 * Create a new mount using a superblock configuration and request it 3646 3659 * be added to the namespace tree. 3647 3660 */ 3648 - static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, 3661 + static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint, 3649 3662 unsigned int mnt_flags) 3650 3663 { 3651 - struct vfsmount *mnt; 3652 - struct pinned_mountpoint mp = {}; 3653 - struct super_block *sb = fc->root->d_sb; 3664 + struct super_block *sb; 3665 + struct vfsmount *mnt __free(mntput) = fc_mount(fc); 3654 3666 int error; 3655 3667 3656 - error = security_sb_kern_mount(sb); 3657 - if (!error && mount_too_revealing(sb, &mnt_flags)) { 3658 - errorfcp(fc, "VFS", "Mount too revealing"); 3659 - error = -EPERM; 3660 - } 3661 - 3662 - if (unlikely(error)) { 3663 - fc_drop_locked(fc); 3664 - return error; 3665 - } 3666 - 3667 - up_write(&sb->s_umount); 3668 - 3669 - mnt = vfs_create_mount(fc); 3670 3668 if (IS_ERR(mnt)) 3671 3669 return PTR_ERR(mnt); 3672 3670 3671 + sb = fc->root->d_sb; 3672 + error = security_sb_kern_mount(sb); 3673 + if (unlikely(error)) 3674 + return error; 3675 + 3676 + if (unlikely(mount_too_revealing(sb, &mnt_flags))) { 3677 + errorfcp(fc, "VFS", "Mount too revealing"); 3678 + return -EPERM; 3679 + } 3680 + 3673 3681 mnt_warn_timestamp_expiry(mountpoint, mnt); 3674 3682 3675 - error = lock_mount(mountpoint, &mp); 3676 - if (!error) { 3677 - error = do_add_mount(real_mount(mnt), mp.mp, 3678 - mountpoint, mnt_flags); 3679 - unlock_mount(&mp); 3680 - } 3681 - if (error < 0) 3682 - mntput(mnt); 3683 + LOCK_MOUNT(mp, mountpoint); 3684 + error = do_add_mount(real_mount(mnt), &mp, mnt_flags); 3685 + if (!error) 3686 + retain_and_null_ptr(mnt); // consumed on success 3683 3687 return error; 3684 3688 } 3685 3689 ··· 3678 3700 * create a new mount for userspace and request it to be added into the 3679 3701 * namespace's tree 3680 3702 */ 3681 - static int do_new_mount(struct path *path, const char *fstype, int sb_flags, 3682 - int mnt_flags, const char *name, void *data) 3703 + static int do_new_mount(const struct path *path, const char *fstype, 3704 + int sb_flags, int mnt_flags, 3705 + const char *name, void *data) 3683 3706 { 3684 3707 struct file_system_type *type; 3685 3708 struct fs_context *fc; ··· 3726 3747 if (!err && !mount_capable(fc)) 3727 3748 err = -EPERM; 3728 3749 if (!err) 3729 - err = vfs_get_tree(fc); 3730 - if (!err) 3731 3750 err = do_new_mount_fc(fc, path, mnt_flags); 3732 3751 3733 3752 put_fs_context(fc); 3734 3753 return err; 3735 3754 } 3736 3755 3737 - int finish_automount(struct vfsmount *m, const struct path *path) 3756 + static void lock_mount_exact(const struct path *path, 3757 + struct pinned_mountpoint *mp) 3738 3758 { 3739 3759 struct dentry *dentry = path->dentry; 3740 - struct pinned_mountpoint mp = {}; 3760 + int err; 3761 + 3762 + inode_lock(dentry->d_inode); 3763 + namespace_lock(); 3764 + if (unlikely(cant_mount(dentry))) 3765 + err = -ENOENT; 3766 + else if (path_overmounted(path)) 3767 + err = -EBUSY; 3768 + else 3769 + err = get_mountpoint(dentry, mp); 3770 + if (unlikely(err)) { 3771 + namespace_unlock(); 3772 + inode_unlock(dentry->d_inode); 3773 + mp->parent = ERR_PTR(err); 3774 + } else { 3775 + mp->parent = real_mount(path->mnt); 3776 + } 3777 + } 3778 + 3779 + int finish_automount(struct vfsmount *__m, const struct path *path) 3780 + { 3781 + struct vfsmount *m __free(mntput) = __m; 3741 3782 struct mount *mnt; 3742 3783 int err; 3743 3784 ··· 3768 3769 3769 3770 mnt = real_mount(m); 3770 3771 3771 - if (m->mnt_sb == path->mnt->mnt_sb && 3772 - m->mnt_root == dentry) { 3773 - err = -ELOOP; 3774 - goto discard; 3775 - } 3772 + if (m->mnt_root == path->dentry) 3773 + return -ELOOP; 3776 3774 3777 3775 /* 3778 - * we don't want to use lock_mount() - in this case finding something 3776 + * we don't want to use LOCK_MOUNT() - in this case finding something 3779 3777 * that overmounts our mountpoint to be means "quitely drop what we've 3780 3778 * got", not "try to mount it on top". 3781 3779 */ 3782 - inode_lock(dentry->d_inode); 3783 - namespace_lock(); 3784 - if (unlikely(cant_mount(dentry))) { 3785 - err = -ENOENT; 3786 - goto discard_locked; 3787 - } 3788 - if (path_overmounted(path)) { 3789 - err = 0; 3790 - goto discard_locked; 3791 - } 3792 - err = get_mountpoint(dentry, &mp); 3793 - if (err) 3794 - goto discard_locked; 3780 + LOCK_MOUNT_EXACT(mp, path); 3781 + if (mp.parent == ERR_PTR(-EBUSY)) 3782 + return 0; 3795 3783 3796 - err = do_add_mount(mnt, mp.mp, path, 3797 - path->mnt->mnt_flags | MNT_SHRINKABLE); 3798 - unlock_mount(&mp); 3799 - if (unlikely(err)) 3800 - goto discard; 3801 - return 0; 3802 - 3803 - discard_locked: 3804 - namespace_unlock(); 3805 - inode_unlock(dentry->d_inode); 3806 - discard: 3807 - mntput(m); 3784 + err = do_add_mount(mnt, &mp, path->mnt->mnt_flags | MNT_SHRINKABLE); 3785 + if (likely(!err)) 3786 + retain_and_null_ptr(m); 3808 3787 return err; 3809 3788 } 3810 3789 ··· 3793 3816 */ 3794 3817 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 3795 3818 { 3796 - read_seqlock_excl(&mount_lock); 3819 + guard(mount_locked_reader)(); 3797 3820 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 3798 - read_sequnlock_excl(&mount_lock); 3799 3821 } 3800 3822 EXPORT_SYMBOL(mnt_set_expiry); 3801 3823 ··· 3811 3835 if (list_empty(mounts)) 3812 3836 return; 3813 3837 3814 - namespace_lock(); 3815 - lock_mount_hash(); 3838 + guard(namespace_excl)(); 3839 + guard(mount_writer)(); 3816 3840 3817 3841 /* extract from the expiration list every vfsmount that matches the 3818 3842 * following criteria: ··· 3834 3858 touch_mnt_namespace(mnt->mnt_ns); 3835 3859 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); 3836 3860 } 3837 - unlock_mount_hash(); 3838 - namespace_unlock(); 3839 3861 } 3840 3862 3841 3863 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); ··· 3961 3987 * Therefore, if this magic number is present, it carries no information 3962 3988 * and must be discarded. 3963 3989 */ 3964 - int path_mount(const char *dev_name, struct path *path, 3990 + int path_mount(const char *dev_name, const struct path *path, 3965 3991 const char *type_page, unsigned long flags, void *data_page) 3966 3992 { 3967 3993 unsigned int mnt_flags = 0, sb_flags; ··· 4043 4069 int do_mount(const char *dev_name, const char __user *dir_name, 4044 4070 const char *type_page, unsigned long flags, void *data_page) 4045 4071 { 4046 - struct path path; 4072 + struct path path __free(path_put) = {}; 4047 4073 int ret; 4048 4074 4049 4075 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); 4050 4076 if (ret) 4051 4077 return ret; 4052 - ret = path_mount(dev_name, &path, type_page, flags, data_page); 4053 - path_put(&path); 4054 - return ret; 4078 + return path_mount(dev_name, &path, type_page, flags, data_page); 4055 4079 } 4056 4080 4057 4081 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) ··· 4110 4138 struct user_namespace *user_ns, struct fs_struct *new_fs) 4111 4139 { 4112 4140 struct mnt_namespace *new_ns; 4113 - struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 4141 + struct vfsmount *rootmnt __free(mntput) = NULL; 4142 + struct vfsmount *pwdmnt __free(mntput) = NULL; 4114 4143 struct mount *p, *q; 4115 4144 struct mount *old; 4116 4145 struct mount *new; ··· 4130 4157 if (IS_ERR(new_ns)) 4131 4158 return new_ns; 4132 4159 4133 - namespace_lock(); 4160 + guard(namespace_excl)(); 4134 4161 /* First pass: copy the tree topology */ 4135 4162 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 4136 4163 if (user_ns != ns->user_ns) 4137 4164 copy_flags |= CL_SLAVE; 4138 4165 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 4139 4166 if (IS_ERR(new)) { 4140 - namespace_unlock(); 4141 - ns_common_free(ns); 4142 - dec_mnt_namespaces(new_ns->ucounts); 4143 - mnt_ns_release(new_ns); 4167 + emptied_ns = new_ns; 4144 4168 return ERR_CAST(new); 4145 4169 } 4146 4170 if (user_ns != ns->user_ns) { 4147 - lock_mount_hash(); 4171 + guard(mount_writer)(); 4148 4172 lock_mnt_tree(new); 4149 - unlock_mount_hash(); 4150 4173 } 4151 4174 new_ns->root = new; 4152 4175 ··· 4174 4205 while (p->mnt.mnt_root != q->mnt.mnt_root) 4175 4206 p = next_mnt(skip_mnt_tree(p), old); 4176 4207 } 4177 - namespace_unlock(); 4178 - 4179 - if (rootmnt) 4180 - mntput(rootmnt); 4181 - if (pwdmnt) 4182 - mntput(pwdmnt); 4183 - 4184 4208 ns_tree_add_raw(new_ns); 4185 4209 return new_ns; 4186 4210 } ··· 4398 4436 return ret; 4399 4437 } 4400 4438 4401 - static inline int vfs_move_mount(struct path *from_path, struct path *to_path, 4439 + static inline int vfs_move_mount(const struct path *from_path, 4440 + const struct path *to_path, 4402 4441 enum mnt_tree_flags_t mflags) 4403 4442 { 4404 4443 int ret; ··· 4505 4542 /* 4506 4543 * Return true if path is reachable from root 4507 4544 * 4508 - * namespace_sem or mount_lock is held 4545 + * locks: mount_locked_reader || namespace_shared && is_mounted(mnt) 4509 4546 */ 4510 4547 bool is_path_reachable(struct mount *mnt, struct dentry *dentry, 4511 4548 const struct path *root) ··· 4519 4556 4520 4557 bool path_is_under(const struct path *path1, const struct path *path2) 4521 4558 { 4522 - bool res; 4523 - read_seqlock_excl(&mount_lock); 4524 - res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 4525 - read_sequnlock_excl(&mount_lock); 4526 - return res; 4559 + guard(mount_locked_reader)(); 4560 + return is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 4527 4561 } 4528 4562 EXPORT_SYMBOL(path_is_under); 4529 4563 ··· 4552 4592 SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 4553 4593 const char __user *, put_old) 4554 4594 { 4555 - struct path new, old, root; 4595 + struct path new __free(path_put) = {}; 4596 + struct path old __free(path_put) = {}; 4597 + struct path root __free(path_put) = {}; 4556 4598 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent; 4557 - struct pinned_mountpoint old_mp = {}; 4558 4599 int error; 4559 4600 4560 4601 if (!may_mount()) ··· 4564 4603 error = user_path_at(AT_FDCWD, new_root, 4565 4604 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new); 4566 4605 if (error) 4567 - goto out0; 4606 + return error; 4568 4607 4569 4608 error = user_path_at(AT_FDCWD, put_old, 4570 4609 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old); 4571 4610 if (error) 4572 - goto out1; 4611 + return error; 4573 4612 4574 4613 error = security_sb_pivotroot(&old, &new); 4575 4614 if (error) 4576 - goto out2; 4615 + return error; 4577 4616 4578 4617 get_fs_root(current->fs, &root); 4579 - error = lock_mount(&old, &old_mp); 4580 - if (error) 4581 - goto out3; 4582 4618 4583 - error = -EINVAL; 4619 + LOCK_MOUNT(old_mp, &old); 4620 + old_mnt = old_mp.parent; 4621 + if (IS_ERR(old_mnt)) 4622 + return PTR_ERR(old_mnt); 4623 + 4584 4624 new_mnt = real_mount(new.mnt); 4585 4625 root_mnt = real_mount(root.mnt); 4586 - old_mnt = real_mount(old.mnt); 4587 4626 ex_parent = new_mnt->mnt_parent; 4588 4627 root_parent = root_mnt->mnt_parent; 4589 4628 if (IS_MNT_SHARED(old_mnt) || 4590 4629 IS_MNT_SHARED(ex_parent) || 4591 4630 IS_MNT_SHARED(root_parent)) 4592 - goto out4; 4631 + return -EINVAL; 4593 4632 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 4594 - goto out4; 4633 + return -EINVAL; 4595 4634 if (new_mnt->mnt.mnt_flags & MNT_LOCKED) 4596 - goto out4; 4597 - error = -ENOENT; 4635 + return -EINVAL; 4598 4636 if (d_unlinked(new.dentry)) 4599 - goto out4; 4600 - error = -EBUSY; 4637 + return -ENOENT; 4601 4638 if (new_mnt == root_mnt || old_mnt == root_mnt) 4602 - goto out4; /* loop, on the same file system */ 4603 - error = -EINVAL; 4639 + return -EBUSY; /* loop, on the same file system */ 4604 4640 if (!path_mounted(&root)) 4605 - goto out4; /* not a mountpoint */ 4641 + return -EINVAL; /* not a mountpoint */ 4606 4642 if (!mnt_has_parent(root_mnt)) 4607 - goto out4; /* absolute root */ 4643 + return -EINVAL; /* absolute root */ 4608 4644 if (!path_mounted(&new)) 4609 - goto out4; /* not a mountpoint */ 4645 + return -EINVAL; /* not a mountpoint */ 4610 4646 if (!mnt_has_parent(new_mnt)) 4611 - goto out4; /* absolute root */ 4647 + return -EINVAL; /* absolute root */ 4612 4648 /* make sure we can reach put_old from new_root */ 4613 - if (!is_path_reachable(old_mnt, old.dentry, &new)) 4614 - goto out4; 4649 + if (!is_path_reachable(old_mnt, old_mp.mp->m_dentry, &new)) 4650 + return -EINVAL; 4615 4651 /* make certain new is below the root */ 4616 4652 if (!is_path_reachable(new_mnt, new.dentry, &root)) 4617 - goto out4; 4653 + return -EINVAL; 4618 4654 lock_mount_hash(); 4619 4655 umount_mnt(new_mnt); 4620 4656 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { ··· 4630 4672 mnt_notify_add(root_mnt); 4631 4673 mnt_notify_add(new_mnt); 4632 4674 chroot_fs_refs(&root, &new); 4633 - error = 0; 4634 - out4: 4635 - unlock_mount(&old_mp); 4636 - out3: 4637 - path_put(&root); 4638 - out2: 4639 - path_put(&old); 4640 - out1: 4641 - path_put(&new); 4642 - out0: 4643 - return error; 4675 + return 0; 4644 4676 } 4645 4677 4646 4678 static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) ··· 4720 4772 4721 4773 if (!mnt_allow_writers(kattr, m)) { 4722 4774 err = mnt_hold_writers(m); 4723 - if (err) 4775 + if (err) { 4776 + m = next_mnt(m, mnt); 4724 4777 break; 4778 + } 4725 4779 } 4726 4780 4727 4781 if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) ··· 4731 4781 } 4732 4782 4733 4783 if (err) { 4734 - struct mount *p; 4735 - 4736 - /* 4737 - * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will 4738 - * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all 4739 - * mounts and needs to take care to include the first mount. 4740 - */ 4741 - for (p = mnt; p; p = next_mnt(p, mnt)) { 4742 - /* If we had to hold writers unblock them. */ 4743 - if (p->mnt.mnt_flags & MNT_WRITE_HOLD) 4744 - mnt_unhold_writers(p); 4745 - 4746 - /* 4747 - * We're done once the first mount we changed got 4748 - * MNT_WRITE_HOLD unset. 4749 - */ 4750 - if (p == m) 4751 - break; 4752 - } 4784 + /* undo all mnt_hold_writers() we'd done */ 4785 + for (struct mount *p = mnt; p != m; p = next_mnt(p, mnt)) 4786 + mnt_unhold_writers(p); 4753 4787 } 4754 4788 return err; 4755 4789 } ··· 4764 4830 WRITE_ONCE(m->mnt.mnt_flags, flags); 4765 4831 4766 4832 /* If we had to hold writers unblock them. */ 4767 - if (m->mnt.mnt_flags & MNT_WRITE_HOLD) 4768 - mnt_unhold_writers(m); 4833 + mnt_unhold_writers(m); 4769 4834 4770 4835 if (kattr->propagation) 4771 4836 change_mnt_propagation(m, kattr->propagation); ··· 4774 4841 touch_mnt_namespace(mnt->mnt_ns); 4775 4842 } 4776 4843 4777 - static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) 4844 + static int do_mount_setattr(const struct path *path, struct mount_kattr *kattr) 4778 4845 { 4779 4846 struct mount *mnt = real_mount(path->mnt); 4780 4847 int err = 0; ··· 5572 5639 STATMOUNT_MNT_UIDMAP | \ 5573 5640 STATMOUNT_MNT_GIDMAP) 5574 5641 5642 + /* locks: namespace_shared */ 5575 5643 static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, 5576 5644 struct mnt_namespace *ns) 5577 5645 { ··· 5819 5885 if (ret) 5820 5886 return ret; 5821 5887 5822 - scoped_guard(rwsem_read, &namespace_sem) 5888 + scoped_guard(namespace_shared) 5823 5889 ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns); 5824 5890 5825 5891 if (!ret) ··· 5840 5906 struct path root; 5841 5907 }; 5842 5908 5909 + /* locks: namespace_shared */ 5843 5910 static ssize_t do_listmount(struct klistmount *kls, bool reverse) 5844 5911 { 5845 5912 struct mnt_namespace *ns = kls->ns; ··· 5975 6040 * We only need to guard against mount topology changes as 5976 6041 * listmount() doesn't care about any mount properties. 5977 6042 */ 5978 - scoped_guard(rwsem_read, &namespace_sem) 6043 + scoped_guard(namespace_shared) 5979 6044 ret = do_listmount(&kls, (flags & LISTMOUNT_REVERSE)); 5980 6045 if (ret <= 0) 5981 6046 return ret; ··· 6062 6127 { 6063 6128 if (!ns_ref_put(ns)) 6064 6129 return; 6065 - namespace_lock(); 6130 + guard(namespace_excl)(); 6066 6131 emptied_ns = ns; 6067 - lock_mount_hash(); 6132 + guard(mount_writer)(); 6068 6133 umount_tree(ns->root, 0); 6069 - unlock_mount_hash(); 6070 - namespace_unlock(); 6071 6134 } 6072 6135 6073 6136 struct vfsmount *kern_mount(struct file_system_type *type) ··· 6114 6181 bool current_chrooted(void) 6115 6182 { 6116 6183 /* Does the current process have a non-standard root */ 6117 - struct path ns_root; 6118 - struct path fs_root; 6119 - bool chrooted; 6120 - 6121 - /* Find the namespace root */ 6122 - ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt; 6123 - ns_root.dentry = ns_root.mnt->mnt_root; 6124 - path_get(&ns_root); 6125 - while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) 6126 - ; 6184 + struct path fs_root __free(path_put) = {}; 6185 + struct mount *root; 6127 6186 6128 6187 get_fs_root(current->fs, &fs_root); 6129 6188 6130 - chrooted = !path_equal(&fs_root, &ns_root); 6189 + /* Find the namespace root */ 6131 6190 6132 - path_put(&fs_root); 6133 - path_put(&ns_root); 6191 + guard(mount_locked_reader)(); 6134 6192 6135 - return chrooted; 6193 + root = topmost_overmount(current->nsproxy->mnt_ns->root); 6194 + 6195 + return fs_root.mnt != &root->mnt || !path_mounted(&fs_root); 6136 6196 } 6137 6197 6138 6198 static bool mnt_already_visible(struct mnt_namespace *ns, ··· 6134 6208 { 6135 6209 int new_flags = *new_mnt_flags; 6136 6210 struct mount *mnt, *n; 6137 - bool visible = false; 6138 6211 6139 - down_read(&namespace_sem); 6212 + guard(namespace_shared)(); 6140 6213 rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { 6141 6214 struct mount *child; 6142 6215 int mnt_flags; ··· 6182 6257 /* Preserve the locked attributes */ 6183 6258 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \ 6184 6259 MNT_LOCK_ATIME); 6185 - visible = true; 6186 - goto found; 6260 + return true; 6187 6261 next: ; 6188 6262 } 6189 - found: 6190 - up_read(&namespace_sem); 6191 - return visible; 6263 + return false; 6192 6264 } 6193 6265 6194 6266 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
+56 -19
fs/pnode.c
··· 29 29 return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave); 30 30 } 31 31 32 + /* locks: namespace_shared && is_mounted(mnt) */ 32 33 static struct mount *get_peer_under_root(struct mount *mnt, 33 34 struct mnt_namespace *ns, 34 35 const struct path *root) ··· 51 50 * Get ID of closest dominating peer group having a representative 52 51 * under the given root. 53 52 * 54 - * Caller must hold namespace_sem 53 + * locks: namespace_shared 55 54 */ 56 55 int get_dominating_id(struct mount *mnt, const struct path *root) 57 56 { ··· 69 68 static inline bool will_be_unmounted(struct mount *m) 70 69 { 71 70 return m->mnt.mnt_flags & MNT_UMOUNT; 72 - } 73 - 74 - static struct mount *propagation_source(struct mount *mnt) 75 - { 76 - do { 77 - struct mount *m; 78 - for (m = next_peer(mnt); m != mnt; m = next_peer(m)) { 79 - if (!will_be_unmounted(m)) 80 - return m; 81 - } 82 - mnt = mnt->mnt_master; 83 - } while (mnt && will_be_unmounted(mnt)); 84 - return mnt; 85 71 } 86 72 87 73 static void transfer_propagation(struct mount *mnt, struct mount *to) ··· 99 111 return; 100 112 } 101 113 if (IS_MNT_SHARED(mnt)) { 102 - if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list)) 103 - m = propagation_source(mnt); 104 114 if (list_empty(&mnt->mnt_share)) { 105 115 mnt_release_group_id(mnt); 106 116 } else { 117 + m = next_peer(mnt); 107 118 list_del_init(&mnt->mnt_share); 108 119 mnt->mnt_group_id = 0; 109 120 } ··· 120 133 mnt->mnt_t_flags |= T_UNBINDABLE; 121 134 else 122 135 mnt->mnt_t_flags &= ~T_UNBINDABLE; 136 + } 137 + } 138 + 139 + static struct mount *trace_transfers(struct mount *m) 140 + { 141 + while (1) { 142 + struct mount *next = next_peer(m); 143 + 144 + if (next != m) { 145 + list_del_init(&m->mnt_share); 146 + m->mnt_group_id = 0; 147 + m->mnt_master = next; 148 + } else { 149 + if (IS_MNT_SHARED(m)) 150 + mnt_release_group_id(m); 151 + next = m->mnt_master; 152 + } 153 + hlist_del_init(&m->mnt_slave); 154 + CLEAR_MNT_SHARED(m); 155 + SET_MNT_MARK(m); 156 + 157 + if (!next || !will_be_unmounted(next)) 158 + return next; 159 + if (IS_MNT_MARKED(next)) 160 + return next->mnt_master; 161 + m = next; 162 + } 163 + } 164 + 165 + static void set_destinations(struct mount *m, struct mount *master) 166 + { 167 + struct mount *next; 168 + 169 + while ((next = m->mnt_master) != master) { 170 + m->mnt_master = master; 171 + m = next; 172 + } 173 + } 174 + 175 + void bulk_make_private(struct list_head *set) 176 + { 177 + struct mount *m; 178 + 179 + list_for_each_entry(m, set, mnt_list) 180 + if (!IS_MNT_MARKED(m)) 181 + set_destinations(m, trace_transfers(m)); 182 + 183 + list_for_each_entry(m, set, mnt_list) { 184 + transfer_propagation(m, m->mnt_master); 185 + m->mnt_master = NULL; 186 + CLEAR_MNT_MARK(m); 123 187 } 124 188 } 125 189 ··· 342 304 err = PTR_ERR(this); 343 305 break; 344 306 } 345 - read_seqlock_excl(&mount_lock); 346 - mnt_set_mountpoint(n, dest_mp, this); 347 - read_sequnlock_excl(&mount_lock); 307 + scoped_guard(mount_locked_reader) 308 + mnt_set_mountpoint(n, dest_mp, this); 348 309 if (n->mnt_master) 349 310 SET_MNT_MARK(n->mnt_master); 350 311 copy = this;
+1
fs/pnode.h
··· 42 42 } 43 43 44 44 void change_mnt_propagation(struct mount *, int); 45 + void bulk_make_private(struct list_head *); 45 46 int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, 46 47 struct hlist_head *); 47 48 void propagate_umount(struct list_head *);
+1 -2
fs/super.c
··· 323 323 if (!s) 324 324 return NULL; 325 325 326 - INIT_LIST_HEAD(&s->s_mounts); 327 326 s->s_user_ns = get_user_ns(user_ns); 328 327 init_rwsem(&s->s_umount); 329 328 lockdep_set_class(&s->s_umount, &type->s_umount_key); ··· 407 408 list_del_init(&s->s_list); 408 409 WARN_ON(s->s_dentry_lru.node); 409 410 WARN_ON(s->s_inode_lru.node); 410 - WARN_ON(!list_empty(&s->s_mounts)); 411 + WARN_ON(s->s_mounts); 411 412 call_rcu(&s->rcu, destroy_super_rcu); 412 413 } 413 414 }
+3 -1
include/linux/fs.h
··· 1434 1434 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; 1435 1435 }; 1436 1436 1437 + struct mount; 1438 + 1437 1439 struct super_block { 1438 1440 struct list_head s_list; /* Keep this first */ 1439 1441 dev_t s_dev; /* search index; _not_ kdev_t */ ··· 1470 1468 __u16 s_encoding_flags; 1471 1469 #endif 1472 1470 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ 1473 - struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1471 + struct mount *s_mounts; /* list of mounts; _not_ for fs use */ 1474 1472 struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ 1475 1473 struct file *s_bdev_file; 1476 1474 struct backing_dev_info *s_bdi;
+4 -5
include/linux/mount.h
··· 33 33 MNT_NOSYMFOLLOW = 0x80, 34 34 35 35 MNT_SHRINKABLE = 0x100, 36 - MNT_WRITE_HOLD = 0x200, 37 36 38 37 MNT_INTERNAL = 0x4000, 39 38 ··· 51 52 | MNT_READONLY | MNT_NOSYMFOLLOW, 52 53 MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, 53 54 54 - MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | 55 + MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED | 55 56 MNT_SYNC_UMOUNT | MNT_LOCKED 56 57 }; 57 58 ··· 76 77 extern struct vfsmount *mntget(struct vfsmount *mnt); 77 78 extern void mnt_make_shortterm(struct vfsmount *mnt); 78 79 extern struct vfsmount *mnt_clone_internal(const struct path *path); 79 - extern bool __mnt_is_readonly(struct vfsmount *mnt); 80 + extern bool __mnt_is_readonly(const struct vfsmount *mnt); 80 81 extern bool mnt_may_suid(struct vfsmount *mnt); 81 82 82 83 extern struct vfsmount *clone_private_mount(const struct path *path); ··· 103 104 extern int may_umount(struct vfsmount *); 104 105 int do_mount(const char *, const char __user *, 105 106 const char *, unsigned long, void *); 106 - extern struct path *collect_paths(const struct path *, struct path *, unsigned); 107 - extern void drop_collected_paths(struct path *, struct path *); 107 + extern const struct path *collect_paths(const struct path *, struct path *, unsigned); 108 + extern void drop_collected_paths(const struct path *, const struct path *); 108 109 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); 109 110 110 111 extern int cifs_root_data(char **dev, char **opts);
+6 -6
kernel/audit_tree.c
··· 680 680 struct audit_tree *tree; 681 681 struct path path; 682 682 struct audit_node *node; 683 - struct path *paths; 683 + const struct path *paths; 684 684 struct path array[16]; 685 685 int err; 686 686 ··· 703 703 struct audit_chunk *chunk = find_chunk(node); 704 704 /* this could be NULL if the watch is dying else where... */ 705 705 node->index |= 1U<<31; 706 - for (struct path *p = paths; p->dentry; p++) { 706 + for (const struct path *p = paths; p->dentry; p++) { 707 707 struct inode *inode = p->dentry->d_inode; 708 708 if (inode_to_key(inode) == chunk->key) { 709 709 node->index &= ~(1U<<31); ··· 742 742 put_tree(tree); 743 743 } 744 744 745 - static int tag_mounts(struct path *paths, struct audit_tree *tree) 745 + static int tag_mounts(const struct path *paths, struct audit_tree *tree) 746 746 { 747 - for (struct path *p = paths; p->dentry; p++) { 747 + for (const struct path *p = paths; p->dentry; p++) { 748 748 int err = tag_chunk(p->dentry->d_inode, tree); 749 749 if (err) 750 750 return err; ··· 807 807 struct audit_tree *seed = rule->tree, *tree; 808 808 struct path path; 809 809 struct path array[16]; 810 - struct path *paths; 810 + const struct path *paths; 811 811 int err; 812 812 813 813 rule->tree = NULL; ··· 879 879 int failed = 0; 880 880 struct path path1, path2; 881 881 struct path array[16]; 882 - struct path *paths; 882 + const struct path *paths; 883 883 int err; 884 884 885 885 err = kern_path(new, 0, &path2);