Merge branches 'work.path' and 'work.mount' into work.f_path

+1 -1

fs/bpf_fs_kfuncs.c

··· 79 79 * pathname in *buf*, including the NUL termination character. On error, a 80 80 * negative integer is returned. 81 81 */ 82 - __bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) 82 + __bpf_kfunc int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) 83 83 { 84 84 int len; 85 85 char *ret;

+13 -20

fs/configfs/symlink.c

··· 114 114 } 115 115 116 116 117 - static int get_target(const char *symname, struct path *path, 118 - struct config_item **target, struct super_block *sb) 117 + static int get_target(const char *symname, struct config_item **target, 118 + struct super_block *sb) 119 119 { 120 + struct path path __free(path_put) = {}; 120 121 int ret; 121 122 122 - ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); 123 - if (!ret) { 124 - if (path->dentry->d_sb == sb) { 125 - *target = configfs_get_config_item(path->dentry); 126 - if (!*target) { 127 - ret = -ENOENT; 128 - path_put(path); 129 - } 130 - } else { 131 - ret = -EPERM; 132 - path_put(path); 133 - } 134 - } 135 - 136 - return ret; 123 + ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); 124 + if (ret) 125 + return ret; 126 + if (path.dentry->d_sb != sb) 127 + return -EPERM; 128 + *target = configfs_get_config_item(path.dentry); 129 + if (!*target) 130 + return -ENOENT; 131 + return 0; 137 132 } 138 133 139 134 ··· 136 141 struct dentry *dentry, const char *symname) 137 142 { 138 143 int ret; 139 - struct path path; 140 144 struct configfs_dirent *sd; 141 145 struct config_item *parent_item; 142 146 struct config_item *target_item = NULL; ··· 182 188 * AV, a thoroughly annoyed bastard. 183 189 */ 184 190 inode_unlock(dir); 185 - ret = get_target(symname, &path, &target_item, dentry->d_sb); 191 + ret = get_target(symname, &target_item, dentry->d_sb); 186 192 inode_lock(dir); 187 193 if (ret) 188 194 goto out_put; ··· 204 210 } 205 211 206 212 config_item_put(target_item); 207 - path_put(&path); 208 213 209 214 out_put: 210 215 config_item_put(parent_item);

+2 -2

fs/dcache.c

··· 1390 1390 unsigned int mounted; 1391 1391 }; 1392 1392 1393 + /* locks: mount_locked_reader && dentry->d_lock */ 1393 1394 static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry) 1394 1395 { 1395 1396 struct check_mount *info = data; ··· 1417 1416 { 1418 1417 struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; 1419 1418 1420 - read_seqlock_excl(&mount_lock); 1419 + guard(mount_locked_reader)(); 1421 1420 d_walk(parent->dentry, &data, path_check_mount); 1422 - read_sequnlock_excl(&mount_lock); 1423 1421 1424 1422 return data.mounted; 1425 1423 }

+1 -13

fs/ecryptfs/dentry.c

··· 59 59 return rc; 60 60 } 61 61 62 - struct kmem_cache *ecryptfs_dentry_info_cache; 63 - 64 - static void ecryptfs_dentry_free_rcu(struct rcu_head *head) 65 - { 66 - kmem_cache_free(ecryptfs_dentry_info_cache, 67 - container_of(head, struct ecryptfs_dentry_info, rcu)); 68 - } 69 - 70 62 /** 71 63 * ecryptfs_d_release 72 64 * @dentry: The ecryptfs dentry ··· 67 75 */ 68 76 static void ecryptfs_d_release(struct dentry *dentry) 69 77 { 70 - struct ecryptfs_dentry_info *p = dentry->d_fsdata; 71 - if (p) { 72 - path_put(&p->lower_path); 73 - call_rcu(&p->rcu, ecryptfs_dentry_free_rcu); 74 - } 78 + dput(dentry->d_fsdata); 75 79 } 76 80 77 81 const struct dentry_operations ecryptfs_dops = {

+11 -16

fs/ecryptfs/ecryptfs_kernel.h

··· 258 258 struct ecryptfs_crypt_stat crypt_stat; 259 259 }; 260 260 261 - /* dentry private data. Each dentry must keep track of a lower 262 - * vfsmount too. */ 263 - struct ecryptfs_dentry_info { 264 - struct path lower_path; 265 - struct rcu_head rcu; 266 - }; 267 - 268 261 /** 269 262 * ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint 270 263 * @flags: Status flags ··· 341 348 /* superblock private data. */ 342 349 struct ecryptfs_sb_info { 343 350 struct super_block *wsi_sb; 351 + struct vfsmount *lower_mnt; 344 352 struct ecryptfs_mount_crypt_stat mount_crypt_stat; 345 353 }; 346 354 ··· 488 494 } 489 495 490 496 static inline void 491 - ecryptfs_set_dentry_private(struct dentry *dentry, 492 - struct ecryptfs_dentry_info *dentry_info) 497 + ecryptfs_set_dentry_lower(struct dentry *dentry, 498 + struct dentry *lower_dentry) 493 499 { 494 - dentry->d_fsdata = dentry_info; 500 + dentry->d_fsdata = lower_dentry; 495 501 } 496 502 497 503 static inline struct dentry * 498 504 ecryptfs_dentry_to_lower(struct dentry *dentry) 499 505 { 500 - return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; 506 + return dentry->d_fsdata; 501 507 } 502 508 503 - static inline const struct path * 504 - ecryptfs_dentry_to_lower_path(struct dentry *dentry) 509 + static inline struct path 510 + ecryptfs_lower_path(struct dentry *dentry) 505 511 { 506 - return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; 512 + return (struct path){ 513 + .mnt = ecryptfs_superblock_to_private(dentry->d_sb)->lower_mnt, 514 + .dentry = ecryptfs_dentry_to_lower(dentry) 515 + }; 507 516 } 508 517 509 518 #define ecryptfs_printk(type, fmt, arg...) \ ··· 529 532 530 533 extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; 531 534 extern struct kmem_cache *ecryptfs_file_info_cache; 532 - extern struct kmem_cache *ecryptfs_dentry_info_cache; 533 535 extern struct kmem_cache *ecryptfs_inode_info_cache; 534 536 extern struct kmem_cache *ecryptfs_sb_info_cache; 535 537 extern struct kmem_cache *ecryptfs_header_cache; ··· 553 557 size_t *encoded_name_size, 554 558 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 555 559 const char *name, size_t name_size); 556 - struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); 557 560 void ecryptfs_dump_hex(char *data, int bytes); 558 561 int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, 559 562 int sg_size);

+7 -8

fs/ecryptfs/file.c

··· 33 33 struct iov_iter *to) 34 34 { 35 35 ssize_t rc; 36 - const struct path *path; 37 36 struct file *file = iocb->ki_filp; 38 37 39 38 rc = generic_file_read_iter(iocb, to); 40 39 if (rc >= 0) { 41 - path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 42 - touch_atime(path); 40 + struct path path = ecryptfs_lower_path(file->f_path.dentry); 41 + touch_atime(&path); 43 42 } 44 43 return rc; 45 44 } ··· 58 59 size_t len, unsigned int flags) 59 60 { 60 61 ssize_t rc; 61 - const struct path *path; 62 62 63 63 rc = filemap_splice_read(in, ppos, pipe, len, flags); 64 64 if (rc >= 0) { 65 - path = ecryptfs_dentry_to_lower_path(in->f_path.dentry); 66 - touch_atime(path); 65 + struct path path = ecryptfs_lower_path(in->f_path.dentry); 66 + touch_atime(&path); 67 67 } 68 68 return rc; 69 69 } ··· 281 283 * ecryptfs_lookup() */ 282 284 struct ecryptfs_file_info *file_info; 283 285 struct file *lower_file; 286 + struct path path; 284 287 285 288 /* Released in ecryptfs_release or end of function if failure */ 286 289 file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ··· 291 292 "Error attempting to allocate memory\n"); 292 293 return -ENOMEM; 293 294 } 294 - lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry), 295 - file->f_flags, current_cred()); 295 + path = ecryptfs_lower_path(ecryptfs_dentry); 296 + lower_file = dentry_open(&path, file->f_flags, current_cred()); 296 297 if (IS_ERR(lower_file)) { 297 298 printk(KERN_ERR "%s: Error attempting to initialize " 298 299 "the lower file for the dentry with name "

+5 -14

fs/ecryptfs/inode.c

··· 327 327 static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, 328 328 struct dentry *lower_dentry) 329 329 { 330 - const struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); 330 + struct dentry *lower_parent = ecryptfs_dentry_to_lower(dentry->d_parent); 331 331 struct inode *inode, *lower_inode; 332 - struct ecryptfs_dentry_info *dentry_info; 333 332 int rc = 0; 334 333 335 - dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 336 - if (!dentry_info) { 337 - dput(lower_dentry); 338 - return ERR_PTR(-ENOMEM); 339 - } 340 - 341 334 fsstack_copy_attr_atime(d_inode(dentry->d_parent), 342 - d_inode(path->dentry)); 335 + d_inode(lower_parent)); 343 336 BUG_ON(!d_count(lower_dentry)); 344 337 345 - ecryptfs_set_dentry_private(dentry, dentry_info); 346 - dentry_info->lower_path.mnt = mntget(path->mnt); 347 - dentry_info->lower_path.dentry = lower_dentry; 338 + ecryptfs_set_dentry_lower(dentry, lower_dentry); 348 339 349 340 /* 350 341 * negative dentry can go positive under us here - its parent is not ··· 1013 1022 { 1014 1023 struct dentry *dentry = path->dentry; 1015 1024 struct kstat lower_stat; 1025 + struct path lower_path = ecryptfs_lower_path(dentry); 1016 1026 int rc; 1017 1027 1018 - rc = vfs_getattr_nosec(ecryptfs_dentry_to_lower_path(dentry), 1019 - &lower_stat, request_mask, flags); 1028 + rc = vfs_getattr_nosec(&lower_path, &lower_stat, request_mask, flags); 1020 1029 if (!rc) { 1021 1030 fsstack_copy_attr_all(d_inode(dentry), 1022 1031 ecryptfs_inode_to_lower(d_inode(dentry)));

+6 -18

fs/ecryptfs/main.c

··· 106 106 struct file **lower_file) 107 107 { 108 108 const struct cred *cred = current_cred(); 109 - const struct path *path = ecryptfs_dentry_to_lower_path(dentry); 109 + struct path path = ecryptfs_lower_path(dentry); 110 110 int rc; 111 111 112 - rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, 113 - cred); 112 + rc = ecryptfs_privileged_open(lower_file, path.dentry, path.mnt, cred); 114 113 if (rc) { 115 114 printk(KERN_ERR "Error opening lower file " 116 115 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 117 - "rc = [%d]\n", path->dentry, path->mnt, rc); 116 + "rc = [%d]\n", path.dentry, path.mnt, rc); 118 117 (*lower_file) = NULL; 119 118 } 120 119 return rc; ··· 436 437 struct ecryptfs_fs_context *ctx = fc->fs_private; 437 438 struct ecryptfs_sb_info *sbi = fc->s_fs_info; 438 439 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 439 - struct ecryptfs_dentry_info *root_info; 440 440 const char *err = "Getting sb failed"; 441 441 struct inode *inode; 442 442 struct path path; ··· 541 543 goto out_free; 542 544 } 543 545 544 - rc = -ENOMEM; 545 - root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 546 - if (!root_info) 547 - goto out_free; 548 - 549 - /* ->kill_sb() will take care of root_info */ 550 - ecryptfs_set_dentry_private(s->s_root, root_info); 551 - root_info->lower_path = path; 546 + ecryptfs_set_dentry_lower(s->s_root, path.dentry); 547 + ecryptfs_superblock_to_private(s)->lower_mnt = path.mnt; 552 548 553 549 s->s_flags |= SB_ACTIVE; 554 550 fc->root = dget(s->s_root); ··· 572 580 kill_anon_super(sb); 573 581 if (!sb_info) 574 582 return; 583 + mntput(sb_info->lower_mnt); 575 584 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); 576 585 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 577 586 } ··· 659 666 .cache = &ecryptfs_file_info_cache, 660 667 .name = "ecryptfs_file_cache", 661 668 .size = sizeof(struct ecryptfs_file_info), 662 - }, 663 - { 664 - .cache = &ecryptfs_dentry_info_cache, 665 - .name = "ecryptfs_dentry_info_cache", 666 - .size = sizeof(struct ecryptfs_dentry_info), 667 669 }, 668 670 { 669 671 .cache = &ecryptfs_inode_info_cache,

+1 -1

fs/file_table.c

··· 54 54 55 55 #define backing_file(f) container_of(f, struct backing_file, file) 56 56 57 - struct path *backing_file_user_path(const struct file *f) 57 + const struct path *backing_file_user_path(const struct file *f) 58 58 { 59 59 return &backing_file(f)->user_path; 60 60 }

+3 -3

fs/internal.h

··· 53 53 * namei.c 54 54 */ 55 55 extern int filename_lookup(int dfd, struct filename *name, unsigned flags, 56 - struct path *path, struct path *root); 56 + struct path *path, const struct path *root); 57 57 int do_rmdir(int dfd, struct filename *name); 58 58 int do_unlinkat(int dfd, struct filename *name); 59 59 int may_linkat(struct mnt_idmap *idmap, const struct path *link); ··· 84 84 extern void dissolve_on_fput(struct vfsmount *); 85 85 extern bool may_mount(void); 86 86 87 - int path_mount(const char *dev_name, struct path *path, 87 + int path_mount(const char *dev_name, const struct path *path, 88 88 const char *type_page, unsigned long flags, void *data_page); 89 - int path_umount(struct path *path, int flags); 89 + int path_umount(const struct path *path, int flags); 90 90 91 91 int show_path(struct seq_file *m, struct dentry *root); 92 92

+38 -1

fs/mount.h

··· 64 64 #endif 65 65 struct list_head mnt_mounts; /* list of children, anchored here */ 66 66 struct list_head mnt_child; /* and going through their mnt_child */ 67 - struct list_head mnt_instance; /* mount instance on sb->s_mounts */ 67 + struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */ 68 + struct mount * __aligned(1) *mnt_pprev_for_sb; 69 + /* except that LSB of pprev is stolen */ 70 + #define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */ 68 71 const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ 69 72 struct list_head mnt_list; 70 73 struct list_head mnt_expire; /* link in fs-specific expiry list */ ··· 157 154 158 155 extern seqlock_t mount_lock; 159 156 157 + DEFINE_LOCK_GUARD_0(mount_writer, write_seqlock(&mount_lock), 158 + write_sequnlock(&mount_lock)) 159 + DEFINE_LOCK_GUARD_0(mount_locked_reader, read_seqlock_excl(&mount_lock), 160 + read_sequnlock_excl(&mount_lock)) 161 + 160 162 struct proc_mounts { 161 163 struct mnt_namespace *ns; 162 164 struct path root; ··· 237 229 { 238 230 } 239 231 #endif 232 + 233 + static inline struct mount *topmost_overmount(struct mount *m) 234 + { 235 + while (m->overmount) 236 + m = m->overmount; 237 + return m; 238 + } 239 + 240 + static inline bool __test_write_hold(struct mount * __aligned(1) *val) 241 + { 242 + return (unsigned long)val & WRITE_HOLD; 243 + } 244 + 245 + static inline bool test_write_hold(const struct mount *m) 246 + { 247 + return __test_write_hold(m->mnt_pprev_for_sb); 248 + } 249 + 250 + static inline void set_write_hold(struct mount *m) 251 + { 252 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 253 + | WRITE_HOLD); 254 + } 255 + 256 + static inline void clear_write_hold(struct mount *m) 257 + { 258 + m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb 259 + & ~WRITE_HOLD); 260 + } 240 261 241 262 struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);

+2 -2

fs/namei.c

··· 2673 2673 } 2674 2674 2675 2675 int filename_lookup(int dfd, struct filename *name, unsigned flags, 2676 - struct path *path, struct path *root) 2676 + struct path *path, const struct path *root) 2677 2677 { 2678 2678 int retval; 2679 2679 struct nameidata nd; ··· 4170 4170 } 4171 4171 EXPORT_SYMBOL(kern_path_create); 4172 4172 4173 - void done_path_create(struct path *path, struct dentry *dentry) 4173 + void done_path_create(const struct path *path, struct dentry *dentry) 4174 4174 { 4175 4175 if (!IS_ERR(dentry)) 4176 4176 dput(dentry);

+466 -544

fs/namespace.c

··· 82 82 static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */ 83 83 static DEFINE_SEQLOCK(mnt_ns_tree_lock); 84 84 85 + static inline void namespace_lock(void); 86 + static void namespace_unlock(void); 87 + DEFINE_LOCK_GUARD_0(namespace_excl, namespace_lock(), namespace_unlock()) 88 + DEFINE_LOCK_GUARD_0(namespace_shared, down_read(&namespace_sem), 89 + up_read(&namespace_sem)) 90 + 91 + DEFINE_FREE(mntput, struct vfsmount *, if (!IS_ERR(_T)) mntput(_T)) 92 + 85 93 #ifdef CONFIG_FSNOTIFY 86 94 LIST_HEAD(notify_list); /* protected by namespace_sem */ 87 95 #endif ··· 195 187 static void mnt_ns_tree_remove(struct mnt_namespace *ns) 196 188 { 197 189 /* remove from global mount namespace list */ 198 - if (!is_anon_ns(ns)) { 190 + if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) { 199 191 mnt_ns_tree_write_lock(); 200 192 rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree); 201 193 list_bidir_del_rcu(&ns->mnt_ns_list); ··· 428 420 * mnt_want/drop_write() will _keep_ the filesystem 429 421 * r/w. 430 422 */ 431 - bool __mnt_is_readonly(struct vfsmount *mnt) 423 + bool __mnt_is_readonly(const struct vfsmount *mnt) 432 424 { 433 425 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); 434 426 } ··· 468 460 #endif 469 461 } 470 462 471 - static int mnt_is_readonly(struct vfsmount *mnt) 463 + static int mnt_is_readonly(const struct vfsmount *mnt) 472 464 { 473 465 if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) 474 466 return 1; ··· 509 501 mnt_inc_writers(mnt); 510 502 /* 511 503 * The store to mnt_inc_writers must be visible before we pass 512 - * MNT_WRITE_HOLD loop below, so that the slowpath can see our 513 - * incremented count after it has set MNT_WRITE_HOLD. 504 + * WRITE_HOLD loop below, so that the slowpath can see our 505 + * incremented count after it has set WRITE_HOLD. 514 506 */ 515 507 smp_mb(); 516 508 might_lock(&mount_lock.lock); 517 - while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { 509 + while (__test_write_hold(READ_ONCE(mnt->mnt_pprev_for_sb))) { 518 510 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 519 511 cpu_relax(); 520 512 } else { 521 513 /* 522 514 * This prevents priority inversion, if the task 523 - * setting MNT_WRITE_HOLD got preempted on a remote 515 + * setting WRITE_HOLD got preempted on a remote 524 516 * CPU, and it prevents life lock if the task setting 525 - * MNT_WRITE_HOLD has a lower priority and is bound to 517 + * WRITE_HOLD has a lower priority and is bound to 526 518 * the same CPU as the task that is spinning here. 527 519 */ 528 520 preempt_enable(); 529 - lock_mount_hash(); 530 - unlock_mount_hash(); 521 + read_seqlock_excl(&mount_lock); 522 + read_sequnlock_excl(&mount_lock); 531 523 preempt_disable(); 532 524 } 533 525 } 534 526 /* 535 527 * The barrier pairs with the barrier sb_start_ro_state_change() making 536 - * sure that if we see MNT_WRITE_HOLD cleared, we will also see 528 + * sure that if we see WRITE_HOLD cleared, we will also see 537 529 * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in 538 530 * mnt_is_readonly() and bail in case we are racing with remount 539 531 * read-only. ··· 671 663 * a call to mnt_unhold_writers() in order to stop preventing write access to 672 664 * @mnt. 673 665 * 674 - * Context: This function expects lock_mount_hash() to be held serializing 675 - * setting MNT_WRITE_HOLD. 666 + * Context: This function expects to be in mount_locked_reader scope serializing 667 + * setting WRITE_HOLD. 676 668 * Return: On success 0 is returned. 677 669 * On error, -EBUSY is returned. 678 670 */ 679 671 static inline int mnt_hold_writers(struct mount *mnt) 680 672 { 681 - mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 673 + set_write_hold(mnt); 682 674 /* 683 - * After storing MNT_WRITE_HOLD, we'll read the counters. This store 675 + * After storing WRITE_HOLD, we'll read the counters. This store 684 676 * should be visible before we do. 685 677 */ 686 678 smp_mb(); ··· 696 688 * sum up each counter, if we read a counter before it is incremented, 697 689 * but then read another CPU's count which it has been subsequently 698 690 * decremented from -- we would see more decrements than we should. 699 - * MNT_WRITE_HOLD protects against this scenario, because 691 + * WRITE_HOLD protects against this scenario, because 700 692 * mnt_want_write first increments count, then smp_mb, then spins on 701 - * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 693 + * WRITE_HOLD, so it can't be decremented by another CPU while 702 694 * we're counting up here. 703 695 */ 704 696 if (mnt_get_writers(mnt) > 0) ··· 714 706 * Stop preventing write access to @mnt allowing callers to gain write access 715 707 * to @mnt again. 716 708 * 717 - * This function can only be called after a successful call to 718 - * mnt_hold_writers(). 709 + * This function can only be called after a call to mnt_hold_writers(). 719 710 * 720 - * Context: This function expects lock_mount_hash() to be held. 711 + * Context: This function expects to be in the same mount_locked_reader scope 712 + * as the matching mnt_hold_writers(). 721 713 */ 722 714 static inline void mnt_unhold_writers(struct mount *mnt) 723 715 { 716 + if (!test_write_hold(mnt)) 717 + return; 724 718 /* 725 - * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 719 + * MNT_READONLY must become visible before ~WRITE_HOLD, so writers 726 720 * that become unheld will see MNT_READONLY. 727 721 */ 728 722 smp_wmb(); 729 - mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 723 + clear_write_hold(mnt); 724 + } 725 + 726 + static inline void mnt_del_instance(struct mount *m) 727 + { 728 + struct mount **p = m->mnt_pprev_for_sb; 729 + struct mount *next = m->mnt_next_for_sb; 730 + 731 + if (next) 732 + next->mnt_pprev_for_sb = p; 733 + *p = next; 734 + } 735 + 736 + static inline void mnt_add_instance(struct mount *m, struct super_block *s) 737 + { 738 + struct mount *first = s->s_mounts; 739 + 740 + if (first) 741 + first->mnt_pprev_for_sb = &m->mnt_next_for_sb; 742 + m->mnt_next_for_sb = first; 743 + m->mnt_pprev_for_sb = &s->s_mounts; 744 + s->s_mounts = m; 730 745 } 731 746 732 747 static int mnt_make_readonly(struct mount *mnt) ··· 765 734 766 735 int sb_prepare_remount_readonly(struct super_block *sb) 767 736 { 768 - struct mount *mnt; 769 737 int err = 0; 770 738 771 - /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ 739 + /* Racy optimization. Recheck the counter under WRITE_HOLD */ 772 740 if (atomic_long_read(&sb->s_remove_count)) 773 741 return -EBUSY; 774 742 775 - lock_mount_hash(); 776 - list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 777 - if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { 778 - err = mnt_hold_writers(mnt); 743 + guard(mount_locked_reader)(); 744 + 745 + for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { 746 + if (!(m->mnt.mnt_flags & MNT_READONLY)) { 747 + err = mnt_hold_writers(m); 779 748 if (err) 780 749 break; 781 750 } ··· 785 754 786 755 if (!err) 787 756 sb_start_ro_state_change(sb); 788 - list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 789 - if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) 790 - mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 757 + for (struct mount *m = sb->s_mounts; m; m = m->mnt_next_for_sb) { 758 + if (test_write_hold(m)) 759 + clear_write_hold(m); 791 760 } 792 - unlock_mount_hash(); 793 761 794 762 return err; 795 763 } ··· 847 817 } 848 818 849 819 /** 850 - * __lookup_mnt - find first child mount 820 + * __lookup_mnt - mount hash lookup 851 821 * @mnt: parent mount 852 - * @dentry: mountpoint 822 + * @dentry: dentry of mountpoint 853 823 * 854 - * If @mnt has a child mount @c mounted @dentry find and return it. 824 + * If @mnt has a child mount @c mounted on @dentry find and return it. 825 + * Caller must either hold the spinlock component of @mount_lock or 826 + * hold rcu_read_lock(), sample the seqcount component before the call 827 + * and recheck it afterwards. 855 828 * 856 - * Note that the child mount @c need not be unique. There are cases 857 - * where shadow mounts are created. For example, during mount 858 - * propagation when a source mount @mnt whose root got overmounted by a 859 - * mount @o after path lookup but before @namespace_sem could be 860 - * acquired gets copied and propagated. So @mnt gets copied including 861 - * @o. When @mnt is propagated to a destination mount @d that already 862 - * has another mount @n mounted at the same mountpoint then the source 863 - * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on 864 - * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt 865 - * on @dentry. 866 - * 867 - * Return: The first child of @mnt mounted @dentry or NULL. 829 + * Return: The child of @mnt mounted on @dentry or %NULL. 868 830 */ 869 831 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 870 832 { ··· 869 847 return NULL; 870 848 } 871 849 872 - /* 873 - * lookup_mnt - Return the first child mount mounted at path 850 + /** 851 + * lookup_mnt - Return the child mount mounted at given location 852 + * @path: location in the namespace 874 853 * 875 - * "First" means first mounted chronologically. If you create the 876 - * following mounts: 877 - * 878 - * mount /dev/sda1 /mnt 879 - * mount /dev/sda2 /mnt 880 - * mount /dev/sda3 /mnt 881 - * 882 - * Then lookup_mnt() on the base /mnt dentry in the root mount will 883 - * return successively the root dentry and vfsmount of /dev/sda1, then 884 - * /dev/sda2, then /dev/sda3, then NULL. 885 - * 886 - * lookup_mnt takes a reference to the found vfsmount. 854 + * Acquires and returns a new reference to mount at given location 855 + * or %NULL if nothing is mounted there. 887 856 */ 888 857 struct vfsmount *lookup_mnt(const struct path *path) 889 858 { ··· 911 898 { 912 899 struct mnt_namespace *ns = current->nsproxy->mnt_ns; 913 900 struct mount *mnt, *n; 914 - bool is_covered = false; 915 901 916 - down_read(&namespace_sem); 917 - rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { 918 - is_covered = (mnt->mnt_mountpoint == dentry); 919 - if (is_covered) 920 - break; 921 - } 922 - up_read(&namespace_sem); 902 + guard(namespace_shared)(); 923 903 924 - return is_covered; 904 + rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) 905 + if (mnt->mnt_mountpoint == dentry) 906 + return true; 907 + 908 + return false; 925 909 } 926 910 927 911 struct pinned_mountpoint { 928 912 struct hlist_node node; 929 913 struct mountpoint *mp; 914 + struct mount *parent; 930 915 }; 931 916 932 917 static bool lookup_mountpoint(struct dentry *dentry, struct pinned_mountpoint *m) ··· 1015 1004 } 1016 1005 } 1017 1006 1018 - static inline int check_mnt(struct mount *mnt) 1007 + static inline int check_mnt(const struct mount *mnt) 1019 1008 { 1020 1009 return mnt->mnt_ns == current->nsproxy->mnt_ns; 1021 1010 } ··· 1217 1206 touch_mnt_namespace(n); 1218 1207 } 1219 1208 1209 + static void setup_mnt(struct mount *m, struct dentry *root) 1210 + { 1211 + struct super_block *s = root->d_sb; 1212 + 1213 + atomic_inc(&s->s_active); 1214 + m->mnt.mnt_sb = s; 1215 + m->mnt.mnt_root = dget(root); 1216 + m->mnt_mountpoint = m->mnt.mnt_root; 1217 + m->mnt_parent = m; 1218 + 1219 + guard(mount_locked_reader)(); 1220 + mnt_add_instance(m, s); 1221 + } 1222 + 1220 1223 /** 1221 1224 * vfs_create_mount - Create a mount for a configured superblock 1222 1225 * @fc: The configuration context with the superblock attached ··· 1254 1229 if (fc->sb_flags & SB_KERNMOUNT) 1255 1230 mnt->mnt.mnt_flags = MNT_INTERNAL; 1256 1231 1257 - atomic_inc(&fc->root->d_sb->s_active); 1258 - mnt->mnt.mnt_sb = fc->root->d_sb; 1259 - mnt->mnt.mnt_root = dget(fc->root); 1260 - mnt->mnt_mountpoint = mnt->mnt.mnt_root; 1261 - mnt->mnt_parent = mnt; 1232 + setup_mnt(mnt, fc->root); 1262 1233 1263 - lock_mount_hash(); 1264 - list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); 1265 - unlock_mount_hash(); 1266 1234 return &mnt->mnt; 1267 1235 } 1268 1236 EXPORT_SYMBOL(vfs_create_mount); ··· 1313 1295 static struct mount *clone_mnt(struct mount *old, struct dentry *root, 1314 1296 int flag) 1315 1297 { 1316 - struct super_block *sb = old->mnt.mnt_sb; 1317 1298 struct mount *mnt; 1318 1299 int err; 1319 1300 ··· 1337 1320 if (mnt->mnt_group_id) 1338 1321 set_mnt_shared(mnt); 1339 1322 1340 - atomic_inc(&sb->s_active); 1341 1323 mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); 1342 1324 1343 - mnt->mnt.mnt_sb = sb; 1344 - mnt->mnt.mnt_root = dget(root); 1345 - mnt->mnt_mountpoint = mnt->mnt.mnt_root; 1346 - mnt->mnt_parent = mnt; 1347 - lock_mount_hash(); 1348 - list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 1349 - unlock_mount_hash(); 1325 + setup_mnt(mnt, root); 1350 1326 1351 1327 if (flag & CL_PRIVATE) // we are done with it 1352 1328 return mnt; ··· 1445 1435 mnt->mnt.mnt_flags |= MNT_DOOMED; 1446 1436 rcu_read_unlock(); 1447 1437 1448 - list_del(&mnt->mnt_instance); 1438 + mnt_del_instance(mnt); 1449 1439 if (unlikely(!list_empty(&mnt->mnt_expire))) 1450 1440 list_del(&mnt->mnt_expire); 1451 1441 ··· 1786 1776 down_write(&namespace_sem); 1787 1777 } 1788 1778 1789 - DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) 1790 - 1791 1779 enum umount_tree_flags { 1792 1780 UMOUNT_SYNC = 1, 1793 1781 UMOUNT_PROPAGATE = 2, ··· 1850 1842 if (how & UMOUNT_PROPAGATE) 1851 1843 propagate_umount(&tmp_list); 1852 1844 1845 + bulk_make_private(&tmp_list); 1846 + 1853 1847 while (!list_empty(&tmp_list)) { 1854 1848 struct mnt_namespace *ns; 1855 1849 bool disconnect; ··· 1876 1866 umount_mnt(p); 1877 1867 } 1878 1868 } 1879 - change_mnt_propagation(p, MS_PRIVATE); 1880 1869 if (disconnect) 1881 1870 hlist_add_head(&p->mnt_umount, &unmounted); 1882 1871 ··· 2035 2026 struct pinned_mountpoint mp = {}; 2036 2027 struct mount *mnt; 2037 2028 2038 - namespace_lock(); 2039 - lock_mount_hash(); 2029 + guard(namespace_excl)(); 2030 + guard(mount_writer)(); 2031 + 2040 2032 if (!lookup_mountpoint(dentry, &mp)) 2041 - goto out_unlock; 2033 + return; 2042 2034 2043 2035 event++; 2044 2036 while (mp.node.next) { ··· 2051 2041 else umount_tree(mnt, UMOUNT_CONNECTED); 2052 2042 } 2053 2043 unpin_mountpoint(&mp); 2054 - out_unlock: 2055 - unlock_mount_hash(); 2056 - namespace_unlock(); 2057 2044 } 2058 2045 2059 2046 /* ··· 2089 2082 } 2090 2083 2091 2084 // caller is responsible for flags being sane 2092 - int path_umount(struct path *path, int flags) 2085 + int path_umount(const struct path *path, int flags) 2093 2086 { 2094 2087 struct mount *mnt = real_mount(path->mnt); 2095 2088 int ret; ··· 2305 2298 return p; 2306 2299 } 2307 2300 2308 - struct path *collect_paths(const struct path *path, 2301 + const struct path *collect_paths(const struct path *path, 2309 2302 struct path *prealloc, unsigned count) 2310 2303 { 2311 2304 struct mount *root = real_mount(path->mnt); ··· 2313 2306 struct path *res = prealloc, *to_free = NULL; 2314 2307 unsigned n = 0; 2315 2308 2316 - guard(rwsem_read)(&namespace_sem); 2309 + guard(namespace_shared)(); 2317 2310 2318 2311 if (!check_mnt(root)) 2319 2312 return ERR_PTR(-EINVAL); ··· 2339 2332 return res; 2340 2333 } 2341 2334 2342 - void drop_collected_paths(struct path *paths, struct path *prealloc) 2335 + void drop_collected_paths(const struct path *paths, const struct path *prealloc) 2343 2336 { 2344 - for (struct path *p = paths; p->mnt; p++) 2337 + for (const struct path *p = paths; p->mnt; p++) 2345 2338 path_put(p); 2346 2339 if (paths != prealloc) 2347 2340 kfree(paths); ··· 2368 2361 return; 2369 2362 } 2370 2363 2371 - scoped_guard(namespace_lock, &namespace_sem) { 2364 + scoped_guard(namespace_excl) { 2372 2365 if (!anon_ns_root(m)) 2373 2366 return; 2374 2367 ··· 2379 2372 } 2380 2373 } 2381 2374 2375 + /* locks: namespace_shared && pinned(mnt) || mount_locked_reader */ 2382 2376 static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) 2383 2377 { 2384 2378 struct mount *child; ··· 2396 2388 2397 2389 bool has_locked_children(struct mount *mnt, struct dentry *dentry) 2398 2390 { 2399 - bool res; 2400 - 2401 - read_seqlock_excl(&mount_lock); 2402 - res = __has_locked_children(mnt, dentry); 2403 - read_sequnlock_excl(&mount_lock); 2404 - return res; 2391 + guard(mount_locked_reader)(); 2392 + return __has_locked_children(mnt, dentry); 2405 2393 } 2406 2394 2407 2395 /* ··· 2405 2401 * specified subtree. Such references can act as pins for mount namespaces 2406 2402 * that aren't checked by the mount-cycle checking code, thereby allowing 2407 2403 * cycles to be made. 2404 + * 2405 + * locks: mount_locked_reader || namespace_shared && pinned(subtree) 2408 2406 */ 2409 2407 static bool check_for_nsfs_mounts(struct mount *subtree) 2410 2408 { 2411 - struct mount *p; 2412 - bool ret = false; 2413 - 2414 - lock_mount_hash(); 2415 - for (p = subtree; p; p = next_mnt(p, subtree)) 2409 + for (struct mount *p = subtree; p; p = next_mnt(p, subtree)) 2416 2410 if (mnt_ns_loop(p->mnt.mnt_root)) 2417 - goto out; 2418 - 2419 - ret = true; 2420 - out: 2421 - unlock_mount_hash(); 2422 - return ret; 2411 + return false; 2412 + return true; 2423 2413 } 2424 2414 2425 2415 /** ··· 2433 2435 struct mount *old_mnt = real_mount(path->mnt); 2434 2436 struct mount *new_mnt; 2435 2437 2436 - guard(rwsem_read)(&namespace_sem); 2438 + guard(namespace_shared)(); 2437 2439 2438 2440 if (IS_MNT_UNBINDABLE(old_mnt)) 2439 2441 return ERR_PTR(-EINVAL); ··· 2554 2556 /** 2555 2557 * attach_recursive_mnt - attach a source mount tree 2556 2558 * @source_mnt: mount tree to be attached 2557 - * @dest_mnt: mount that @source_mnt will be mounted on 2558 - * @dest_mp: the mountpoint @source_mnt will be mounted at 2559 + * @dest: the context for mounting at the place where the tree should go 2559 2560 * 2560 2561 * NOTE: in the table below explains the semantics when a source mount 2561 2562 * of a given type is attached to a destination mount of a given type. ··· 2617 2620 * Otherwise a negative error code is returned. 2618 2621 */ 2619 2622 static int attach_recursive_mnt(struct mount *source_mnt, 2620 - struct mount *dest_mnt, 2621 - struct mountpoint *dest_mp) 2623 + const struct pinned_mountpoint *dest) 2622 2624 { 2623 2625 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 2626 + struct mount *dest_mnt = dest->parent; 2627 + struct mountpoint *dest_mp = dest->mp; 2624 2628 HLIST_HEAD(tree_list); 2625 2629 struct mnt_namespace *ns = dest_mnt->mnt_ns; 2626 2630 struct pinned_mountpoint root = {}; ··· 2701 2703 child->mnt_mountpoint); 2702 2704 commit_tree(child); 2703 2705 if (q) { 2706 + struct mount *r = topmost_overmount(child); 2704 2707 struct mountpoint *mp = root.mp; 2705 - struct mount *r = child; 2706 - while (unlikely(r->overmount)) 2707 - r = r->overmount; 2708 + 2708 2709 if (unlikely(shorter) && child != source_mnt) 2709 2710 mp = shorter; 2710 2711 mnt_change_mountpoint(r, mp, q); ··· 2732 2735 return err; 2733 2736 } 2734 2737 2735 - /** 2736 - * do_lock_mount - lock mount and mountpoint 2737 - * @path: target path 2738 - * @beneath: whether the intention is to mount beneath @path 2739 - * 2740 - * Follow the mount stack on @path until the top mount @mnt is found. If 2741 - * the initial @path->{mnt,dentry} is a mountpoint lookup the first 2742 - * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root} 2743 - * until nothing is stacked on top of it anymore. 2744 - * 2745 - * Acquire the inode_lock() on the top mount's ->mnt_root to protect 2746 - * against concurrent removal of the new mountpoint from another mount 2747 - * namespace. 2748 - * 2749 - * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint 2750 - * @mp on @mnt->mnt_parent must be acquired. This protects against a 2751 - * concurrent unlink of @mp->mnt_dentry from another mount namespace 2752 - * where @mnt doesn't have a child mount mounted @mp. A concurrent 2753 - * removal of @mnt->mnt_root doesn't matter as nothing will be mounted 2754 - * on top of it for @beneath. 2755 - * 2756 - * In addition, @beneath needs to make sure that @mnt hasn't been 2757 - * unmounted or moved from its current mountpoint in between dropping 2758 - * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt 2759 - * being unmounted would be detected later by e.g., calling 2760 - * check_mnt(mnt) in the function it's called from. For the @beneath 2761 - * case however, it's useful to detect it directly in do_lock_mount(). 2762 - * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points 2763 - * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will 2764 - * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL. 2765 - * 2766 - * Return: Either the target mountpoint on the top mount or the top 2767 - * mount's mountpoint. 2768 - */ 2769 - static int do_lock_mount(struct path *path, struct pinned_mountpoint *pinned, bool beneath) 2738 + static inline struct mount *where_to_mount(const struct path *path, 2739 + struct dentry **dentry, 2740 + bool beneath) 2770 2741 { 2771 - struct vfsmount *mnt = path->mnt; 2772 - struct dentry *dentry; 2773 - struct path under = {}; 2774 - int err = -ENOENT; 2742 + struct mount *m; 2775 2743 2776 - for (;;) { 2777 - struct mount *m = real_mount(mnt); 2744 + if (unlikely(beneath)) { 2745 + m = topmost_overmount(real_mount(path->mnt)); 2746 + *dentry = m->mnt_mountpoint; 2747 + return m->mnt_parent; 2748 + } 2749 + m = __lookup_mnt(path->mnt, path->dentry); 2750 + if (unlikely(m)) { 2751 + m = topmost_overmount(m); 2752 + *dentry = m->mnt.mnt_root; 2753 + return m; 2754 + } 2755 + *dentry = path->dentry; 2756 + return real_mount(path->mnt); 2757 + } 2778 2758 2779 - if (beneath) { 2780 - path_put(&under); 2781 - read_seqlock_excl(&mount_lock); 2782 - under.mnt = mntget(&m->mnt_parent->mnt); 2783 - under.dentry = dget(m->mnt_mountpoint); 2784 - read_sequnlock_excl(&mount_lock); 2785 - dentry = under.dentry; 2786 - } else { 2787 - dentry = path->dentry; 2759 + /** 2760 + * do_lock_mount - acquire environment for mounting 2761 + * @path: target path 2762 + * @res: context to set up 2763 + * @beneath: whether the intention is to mount beneath @path 2764 + * 2765 + * To mount something at given location, we need 2766 + * namespace_sem locked exclusive 2767 + * inode of dentry we are mounting on locked exclusive 2768 + * struct mountpoint for that dentry 2769 + * struct mount we are mounting on 2770 + * 2771 + * Results are stored in caller-supplied context (pinned_mountpoint); 2772 + * on success we have res->parent and res->mp pointing to parent and 2773 + * mountpoint respectively and res->node inserted into the ->m_list 2774 + * of the mountpoint, making sure the mountpoint won't disappear. 2775 + * On failure we have res->parent set to ERR_PTR(-E...), res->mp 2776 + * left NULL, res->node - empty. 2777 + * In case of success do_lock_mount returns with locks acquired (in 2778 + * proper order - inode lock nests outside of namespace_sem). 2779 + * 2780 + * Request to mount on overmounted location is treated as "mount on 2781 + * top of whatever's overmounting it"; request to mount beneath 2782 + * a location - "mount immediately beneath the topmost mount at that 2783 + * place". 2784 + * 2785 + * In all cases the location must not have been unmounted and the 2786 + * chosen mountpoint must be allowed to be mounted on. For "beneath" 2787 + * case we also require the location to be at the root of a mount 2788 + * that has a parent (i.e. is not a root of some namespace). 2789 + */ 2790 + static void do_lock_mount(const struct path *path, 2791 + struct pinned_mountpoint *res, 2792 + bool beneath) 2793 + { 2794 + int err; 2795 + 2796 + if (unlikely(beneath) && !path_mounted(path)) { 2797 + res->parent = ERR_PTR(-EINVAL); 2798 + return; 2799 + } 2800 + 2801 + do { 2802 + struct dentry *dentry, *d; 2803 + struct mount *m, *n; 2804 + 2805 + scoped_guard(mount_locked_reader) { 2806 + m = where_to_mount(path, &dentry, beneath); 2807 + if (&m->mnt != path->mnt) { 2808 + mntget(&m->mnt); 2809 + dget(dentry); 2810 + } 2788 2811 } 2789 2812 2790 2813 inode_lock(dentry->d_inode); 2791 2814 namespace_lock(); 2792 2815 2793 - if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) 2794 - break; // not to be mounted on 2816 + // check if the chain of mounts (if any) has changed. 2817 + scoped_guard(mount_locked_reader) 2818 + n = where_to_mount(path, &d, beneath); 2795 2819 2796 - if (beneath && unlikely(m->mnt_mountpoint != dentry || 2797 - &m->mnt_parent->mnt != under.mnt)) { 2820 + if (unlikely(n != m || dentry != d)) 2821 + err = -EAGAIN; // something moved, retry 2822 + else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt))) 2823 + err = -ENOENT; // not to be mounted on 2824 + else if (beneath && &m->mnt == path->mnt && !m->overmount) 2825 + err = -EINVAL; 2826 + else 2827 + err = get_mountpoint(dentry, res); 2828 + 2829 + if (unlikely(err)) { 2830 + res->parent = ERR_PTR(err); 2798 2831 namespace_unlock(); 2799 2832 inode_unlock(dentry->d_inode); 2800 - continue; // got moved 2833 + } else { 2834 + res->parent = m; 2801 2835 } 2802 - 2803 - mnt = lookup_mnt(path); 2804 - if (unlikely(mnt)) { 2805 - namespace_unlock(); 2806 - inode_unlock(dentry->d_inode); 2807 - path_put(path); 2808 - path->mnt = mnt; 2809 - path->dentry = dget(mnt->mnt_root); 2810 - continue; // got overmounted 2836 + /* 2837 + * Drop the temporary references. This is subtle - on success 2838 + * we are doing that under namespace_sem, which would normally 2839 + * be forbidden. However, in that case we are guaranteed that 2840 + * refcounts won't reach zero, since we know that path->mnt 2841 + * is mounted and thus all mounts reachable from it are pinned 2842 + * and stable, along with their mountpoints and roots. 2843 + */ 2844 + if (&m->mnt != path->mnt) { 2845 + dput(dentry); 2846 + mntput(&m->mnt); 2811 2847 } 2812 - err = get_mountpoint(dentry, pinned); 2813 - if (err) 2814 - break; 2815 - if (beneath) { 2816 - /* 2817 - * @under duplicates the references that will stay 2818 - * at least until namespace_unlock(), so the path_put() 2819 - * below is safe (and OK to do under namespace_lock - 2820 - * we are not dropping the final references here). 2821 - */ 2822 - path_put(&under); 2823 - } 2824 - return 0; 2825 - } 2826 - namespace_unlock(); 2827 - inode_unlock(dentry->d_inode); 2828 - if (beneath) 2829 - path_put(&under); 2830 - return err; 2848 + } while (err == -EAGAIN); 2831 2849 } 2832 2850 2833 - static inline int lock_mount(struct path *path, struct pinned_mountpoint *m) 2834 - { 2835 - return do_lock_mount(path, m, false); 2836 - } 2837 - 2838 - static void unlock_mount(struct pinned_mountpoint *m) 2851 + static void __unlock_mount(struct pinned_mountpoint *m) 2839 2852 { 2840 2853 inode_unlock(m->mp->m_dentry->d_inode); 2841 2854 read_seqlock_excl(&mount_lock); ··· 2854 2847 namespace_unlock(); 2855 2848 } 2856 2849 2857 - static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) 2850 + static inline void unlock_mount(struct pinned_mountpoint *m) 2851 + { 2852 + if (!IS_ERR(m->parent)) 2853 + __unlock_mount(m); 2854 + } 2855 + 2856 + #define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \ 2857 + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ 2858 + do_lock_mount((path), &mp, (beneath)) 2859 + #define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false) 2860 + #define LOCK_MOUNT_EXACT(mp, path) \ 2861 + struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \ 2862 + lock_mount_exact((path), &mp) 2863 + 2864 + static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp) 2858 2865 { 2859 2866 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) 2860 2867 return -EINVAL; 2861 2868 2862 - if (d_is_dir(mp->m_dentry) != 2869 + if (d_is_dir(mp->mp->m_dentry) != 2863 2870 d_is_dir(mnt->mnt.mnt_root)) 2864 2871 return -ENOTDIR; 2865 2872 2866 - return attach_recursive_mnt(mnt, p, mp); 2873 + return attach_recursive_mnt(mnt, mp); 2867 2874 } 2868 2875 2869 2876 static int may_change_propagation(const struct mount *m) ··· 2913 2892 /* 2914 2893 * recursively change the type of the mountpoint. 2915 2894 */ 2916 - static int do_change_type(struct path *path, int ms_flags) 2895 + static int do_change_type(const struct path *path, int ms_flags) 2917 2896 { 2918 2897 struct mount *m; 2919 2898 struct mount *mnt = real_mount(path->mnt); 2920 2899 int recurse = ms_flags & MS_REC; 2921 2900 int type; 2922 - int err = 0; 2901 + int err; 2923 2902 2924 2903 if (!path_mounted(path)) 2925 2904 return -EINVAL; ··· 2928 2907 if (!type) 2929 2908 return -EINVAL; 2930 2909 2931 - namespace_lock(); 2910 + guard(namespace_excl)(); 2911 + 2932 2912 err = may_change_propagation(mnt); 2933 2913 if (err) 2934 - goto out_unlock; 2914 + return err; 2935 2915 2936 2916 if (type == MS_SHARED) { 2937 2917 err = invent_group_ids(mnt, recurse); 2938 2918 if (err) 2939 - goto out_unlock; 2919 + return err; 2940 2920 } 2941 2921 2942 2922 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 2943 2923 change_mnt_propagation(m, type); 2944 2924 2945 - out_unlock: 2946 - namespace_unlock(); 2947 - return err; 2925 + return 0; 2948 2926 } 2949 2927 2950 2928 /* may_copy_tree() - check if a mount tree can be copied ··· 2989 2969 * 2990 2970 * Returns true if the mount tree can be copied, false otherwise. 2991 2971 */ 2992 - static inline bool may_copy_tree(struct path *path) 2972 + static inline bool may_copy_tree(const struct path *path) 2993 2973 { 2994 2974 struct mount *mnt = real_mount(path->mnt); 2995 2975 const struct dentry_operations *d_op; ··· 3011 2991 } 3012 2992 3013 2993 3014 - static struct mount *__do_loopback(struct path *old_path, int recurse) 2994 + static struct mount *__do_loopback(const struct path *old_path, int recurse) 3015 2995 { 3016 2996 struct mount *old = real_mount(old_path->mnt); 3017 2997 ··· 3033 3013 /* 3034 3014 * do loopback mount. 3035 3015 */ 3036 - static int do_loopback(struct path *path, const char *old_name, 3037 - int recurse) 3016 + static int do_loopback(const struct path *path, const char *old_name, 3017 + int recurse) 3038 3018 { 3039 - struct path old_path; 3040 - struct mount *mnt = NULL, *parent; 3041 - struct pinned_mountpoint mp = {}; 3019 + struct path old_path __free(path_put) = {}; 3020 + struct mount *mnt = NULL; 3042 3021 int err; 3043 3022 if (!old_name || !*old_name) 3044 3023 return -EINVAL; ··· 3045 3026 if (err) 3046 3027 return err; 3047 3028 3048 - err = -EINVAL; 3049 3029 if (mnt_ns_loop(old_path.dentry)) 3050 - goto out; 3030 + return -EINVAL; 3051 3031 3052 - err = lock_mount(path, &mp); 3053 - if (err) 3054 - goto out; 3032 + LOCK_MOUNT(mp, path); 3033 + if (IS_ERR(mp.parent)) 3034 + return PTR_ERR(mp.parent); 3055 3035 3056 - parent = real_mount(path->mnt); 3057 - if (!check_mnt(parent)) 3058 - goto out2; 3036 + if (!check_mnt(mp.parent)) 3037 + return -EINVAL; 3059 3038 3060 3039 mnt = __do_loopback(&old_path, recurse); 3061 - if (IS_ERR(mnt)) { 3062 - err = PTR_ERR(mnt); 3063 - goto out2; 3064 - } 3040 + if (IS_ERR(mnt)) 3041 + return PTR_ERR(mnt); 3065 3042 3066 - err = graft_tree(mnt, parent, mp.mp); 3043 + err = graft_tree(mnt, &mp); 3067 3044 if (err) { 3068 3045 lock_mount_hash(); 3069 3046 umount_tree(mnt, UMOUNT_SYNC); 3070 3047 unlock_mount_hash(); 3071 3048 } 3072 - out2: 3073 - unlock_mount(&mp); 3074 - out: 3075 - path_put(&old_path); 3076 3049 return err; 3077 3050 } 3078 3051 3079 - static struct file *open_detached_copy(struct path *path, bool recursive) 3052 + static struct mnt_namespace *get_detached_copy(const struct path *path, bool recursive) 3080 3053 { 3081 3054 struct mnt_namespace *ns, *mnt_ns = current->nsproxy->mnt_ns, *src_mnt_ns; 3082 3055 struct user_namespace *user_ns = mnt_ns->user_ns; 3083 3056 struct mount *mnt, *p; 3084 - struct file *file; 3085 3057 3086 3058 ns = alloc_mnt_ns(user_ns, true); 3087 3059 if (IS_ERR(ns)) 3088 - return ERR_CAST(ns); 3060 + return ns; 3089 3061 3090 - namespace_lock(); 3062 + guard(namespace_excl)(); 3091 3063 3092 3064 /* 3093 3065 * Record the sequence number of the source mount namespace. ··· 3095 3085 3096 3086 mnt = __do_loopback(path, recursive); 3097 3087 if (IS_ERR(mnt)) { 3098 - namespace_unlock(); 3099 - free_mnt_ns(ns); 3088 + emptied_ns = ns; 3100 3089 return ERR_CAST(mnt); 3101 3090 } 3102 3091 3103 - lock_mount_hash(); 3104 3092 for (p = mnt; p; p = next_mnt(p, mnt)) { 3105 3093 mnt_add_to_ns(ns, p); 3106 3094 ns->nr_mounts++; 3107 3095 } 3108 3096 ns->root = mnt; 3109 - mntget(&mnt->mnt); 3110 - unlock_mount_hash(); 3111 - namespace_unlock(); 3097 + return ns; 3098 + } 3099 + 3100 + static struct file *open_detached_copy(struct path *path, bool recursive) 3101 + { 3102 + struct mnt_namespace *ns = get_detached_copy(path, recursive); 3103 + struct file *file; 3104 + 3105 + if (IS_ERR(ns)) 3106 + return ERR_CAST(ns); 3112 3107 3113 3108 mntput(path->mnt); 3114 - path->mnt = &mnt->mnt; 3109 + path->mnt = mntget(&ns->root->mnt); 3115 3110 file = dentry_open(path, O_PATH, current_cred()); 3116 3111 if (IS_ERR(file)) 3117 3112 dissolve_on_fput(path->mnt); ··· 3233 3218 touch_mnt_namespace(mnt->mnt_ns); 3234 3219 } 3235 3220 3236 - static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt) 3221 + static void mnt_warn_timestamp_expiry(const struct path *mountpoint, 3222 + struct vfsmount *mnt) 3237 3223 { 3238 3224 struct super_block *sb = mnt->mnt_sb; 3239 3225 ··· 3268 3252 * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND 3269 3253 * to mount(2). 3270 3254 */ 3271 - static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) 3255 + static int do_reconfigure_mnt(const struct path *path, unsigned int mnt_flags) 3272 3256 { 3273 3257 struct super_block *sb = path->mnt->mnt_sb; 3274 3258 struct mount *mnt = real_mount(path->mnt); ··· 3305 3289 * If you've mounted a non-root directory somewhere and want to do remount 3306 3290 * on it - tough luck. 3307 3291 */ 3308 - static int do_remount(struct path *path, int ms_flags, int sb_flags, 3292 + static int do_remount(const struct path *path, int ms_flags, int sb_flags, 3309 3293 int mnt_flags, void *data) 3310 3294 { 3311 3295 int err; ··· 3363 3347 return 0; 3364 3348 } 3365 3349 3366 - static int do_set_group(struct path *from_path, struct path *to_path) 3350 + static int do_set_group(const struct path *from_path, const struct path *to_path) 3367 3351 { 3368 - struct mount *from, *to; 3352 + struct mount *from = real_mount(from_path->mnt); 3353 + struct mount *to = real_mount(to_path->mnt); 3369 3354 int err; 3370 3355 3371 - from = real_mount(from_path->mnt); 3372 - to = real_mount(to_path->mnt); 3373 - 3374 - namespace_lock(); 3356 + guard(namespace_excl)(); 3375 3357 3376 3358 err = may_change_propagation(from); 3377 3359 if (err) 3378 - goto out; 3360 + return err; 3379 3361 err = may_change_propagation(to); 3380 3362 if (err) 3381 - goto out; 3363 + return err; 3382 3364 3383 - err = -EINVAL; 3384 3365 /* To and From paths should be mount roots */ 3385 3366 if (!path_mounted(from_path)) 3386 - goto out; 3367 + return -EINVAL; 3387 3368 if (!path_mounted(to_path)) 3388 - goto out; 3369 + return -EINVAL; 3389 3370 3390 3371 /* Setting sharing groups is only allowed across same superblock */ 3391 3372 if (from->mnt.mnt_sb != to->mnt.mnt_sb) 3392 - goto out; 3373 + return -EINVAL; 3393 3374 3394 3375 /* From mount root should be wider than To mount root */ 3395 3376 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root)) 3396 - goto out; 3377 + return -EINVAL; 3397 3378 3398 3379 /* From mount should not have locked children in place of To's root */ 3399 3380 if (__has_locked_children(from, to->mnt.mnt_root)) 3400 - goto out; 3381 + return -EINVAL; 3401 3382 3402 3383 /* Setting sharing groups is only allowed on private mounts */ 3403 3384 if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to)) 3404 - goto out; 3385 + return -EINVAL; 3405 3386 3406 3387 /* From should not be private */ 3407 3388 if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from)) 3408 - goto out; 3389 + return -EINVAL; 3409 3390 3410 3391 if (IS_MNT_SLAVE(from)) { 3411 3392 hlist_add_behind(&to->mnt_slave, &from->mnt_slave); ··· 3414 3401 list_add(&to->mnt_share, &from->mnt_share); 3415 3402 set_mnt_shared(to); 3416 3403 } 3417 - 3418 - err = 0; 3419 - out: 3420 - namespace_unlock(); 3421 - return err; 3404 + return 0; 3422 3405 } 3423 3406 3424 3407 /** ··· 3458 3449 3459 3450 /** 3460 3451 * can_move_mount_beneath - check that we can mount beneath the top mount 3461 - * @from: mount to mount beneath 3462 - * @to: mount under which to mount 3463 - * @mp: mountpoint of @to 3452 + * @mnt_from: mount we are trying to move 3453 + * @mnt_to: mount under which to mount 3454 + * @mp: mountpoint of @mnt_to 3464 3455 * 3465 - * - Make sure that @to->dentry is actually the root of a mount under 3466 - * which we can mount another mount. 3467 3456 * - Make sure that nothing can be mounted beneath the caller's current 3468 3457 * root or the rootfs of the namespace. 3469 3458 * - Make sure that the caller can unmount the topmost mount ensuring 3470 3459 * that the caller could reveal the underlying mountpoint. 3471 - * - Ensure that nothing has been mounted on top of @from before we 3460 + * - Ensure that nothing has been mounted on top of @mnt_from before we 3472 3461 * grabbed @namespace_sem to avoid creating pointless shadow mounts. 3473 3462 * - Prevent mounting beneath a mount if the propagation relationship 3474 3463 * between the source mount, parent mount, and top mount would lead to ··· 3475 3468 * Context: This function expects namespace_lock() to be held. 3476 3469 * Return: On success 0, and on error a negative error code is returned. 3477 3470 */ 3478 - static int can_move_mount_beneath(const struct path *from, 3479 - const struct path *to, 3471 + static int can_move_mount_beneath(const struct mount *mnt_from, 3472 + const struct mount *mnt_to, 3480 3473 const struct mountpoint *mp) 3481 3474 { 3482 - struct mount *mnt_from = real_mount(from->mnt), 3483 - *mnt_to = real_mount(to->mnt), 3484 - *parent_mnt_to = mnt_to->mnt_parent; 3485 - 3486 - if (!mnt_has_parent(mnt_to)) 3487 - return -EINVAL; 3488 - 3489 - if (!path_mounted(to)) 3490 - return -EINVAL; 3475 + struct mount *parent_mnt_to = mnt_to->mnt_parent; 3491 3476 3492 3477 if (IS_MNT_LOCKED(mnt_to)) 3493 3478 return -EINVAL; 3494 3479 3495 3480 /* Avoid creating shadow mounts during mount propagation. */ 3496 - if (path_overmounted(from)) 3481 + if (mnt_from->overmount) 3497 3482 return -EINVAL; 3498 3483 3499 3484 /* ··· 3576 3577 return check_anonymous_mnt(mnt); 3577 3578 } 3578 3579 3579 - static int do_move_mount(struct path *old_path, 3580 - struct path *new_path, enum mnt_tree_flags_t flags) 3580 + static int do_move_mount(const struct path *old_path, 3581 + const struct path *new_path, 3582 + enum mnt_tree_flags_t flags) 3581 3583 { 3582 - struct mnt_namespace *ns; 3583 - struct mount *p; 3584 - struct mount *old; 3585 - struct mount *parent; 3586 - struct pinned_mountpoint mp; 3584 + struct mount *old = real_mount(old_path->mnt); 3587 3585 int err; 3588 3586 bool beneath = flags & MNT_TREE_BENEATH; 3589 3587 3590 - err = do_lock_mount(new_path, &mp, beneath); 3591 - if (err) 3592 - return err; 3588 + if (!path_mounted(old_path)) 3589 + return -EINVAL; 3593 3590 3594 - old = real_mount(old_path->mnt); 3595 - p = real_mount(new_path->mnt); 3596 - parent = old->mnt_parent; 3597 - ns = old->mnt_ns; 3591 + if (d_is_dir(new_path->dentry) != d_is_dir(old_path->dentry)) 3592 + return -EINVAL; 3598 3593 3599 - err = -EINVAL; 3594 + LOCK_MOUNT_MAYBE_BENEATH(mp, new_path, beneath); 3595 + if (IS_ERR(mp.parent)) 3596 + return PTR_ERR(mp.parent); 3600 3597 3601 3598 if (check_mnt(old)) { 3602 3599 /* if the source is in our namespace... */ 3603 3600 /* ... it should be detachable from parent */ 3604 3601 if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) 3605 - goto out; 3602 + return -EINVAL; 3603 + /* ... which should not be shared */ 3604 + if (IS_MNT_SHARED(old->mnt_parent)) 3605 + return -EINVAL; 3606 3606 /* ... and the target should be in our namespace */ 3607 - if (!check_mnt(p)) 3608 - goto out; 3609 - /* parent of the source should not be shared */ 3610 - if (IS_MNT_SHARED(parent)) 3611 - goto out; 3607 + if (!check_mnt(mp.parent)) 3608 + return -EINVAL; 3612 3609 } else { 3613 3610 /* 3614 3611 * otherwise the source must be the root of some anon namespace. 3615 3612 */ 3616 3613 if (!anon_ns_root(old)) 3617 - goto out; 3614 + return -EINVAL; 3618 3615 /* 3619 3616 * Bail out early if the target is within the same namespace - 3620 3617 * subsequent checks would've rejected that, but they lose 3621 3618 * some corner cases if we check it early. 3622 3619 */ 3623 - if (ns == p->mnt_ns) 3624 - goto out; 3620 + if (old->mnt_ns == mp.parent->mnt_ns) 3621 + return -EINVAL; 3625 3622 /* 3626 3623 * Target should be either in our namespace or in an acceptable 3627 3624 * anon namespace, sensu check_anonymous_mnt(). 3628 3625 */ 3629 - if (!may_use_mount(p)) 3630 - goto out; 3626 + if (!may_use_mount(mp.parent)) 3627 + return -EINVAL; 3631 3628 } 3632 3629 3633 - if (!path_mounted(old_path)) 3634 - goto out; 3635 - 3636 - if (d_is_dir(new_path->dentry) != 3637 - d_is_dir(old_path->dentry)) 3638 - goto out; 3639 - 3640 3630 if (beneath) { 3641 - err = can_move_mount_beneath(old_path, new_path, mp.mp); 3642 - if (err) 3643 - goto out; 3631 + struct mount *over = real_mount(new_path->mnt); 3644 3632 3645 - err = -EINVAL; 3646 - p = p->mnt_parent; 3633 + if (mp.parent != over->mnt_parent) 3634 + over = mp.parent->overmount; 3635 + err = can_move_mount_beneath(old, over, mp.mp); 3636 + if (err) 3637 + return err; 3647 3638 } 3648 3639 3649 3640 /* 3650 3641 * Don't move a mount tree containing unbindable mounts to a destination 3651 3642 * mount which is shared. 3652 3643 */ 3653 - if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) 3654 - goto out; 3655 - err = -ELOOP; 3644 + if (IS_MNT_SHARED(mp.parent) && tree_contains_unbindable(old)) 3645 + return -EINVAL; 3656 3646 if (!check_for_nsfs_mounts(old)) 3657 - goto out; 3658 - if (mount_is_ancestor(old, p)) 3659 - goto out; 3647 + return -ELOOP; 3648 + if (mount_is_ancestor(old, mp.parent)) 3649 + return -ELOOP; 3660 3650 3661 - err = attach_recursive_mnt(old, p, mp.mp); 3662 - out: 3663 - unlock_mount(&mp); 3664 - return err; 3651 + return attach_recursive_mnt(old, &mp); 3665 3652 } 3666 3653 3667 - static int do_move_mount_old(struct path *path, const char *old_name) 3654 + static int do_move_mount_old(const struct path *path, const char *old_name) 3668 3655 { 3669 - struct path old_path; 3656 + struct path old_path __free(path_put) = {}; 3670 3657 int err; 3671 3658 3672 3659 if (!old_name || !*old_name) ··· 3662 3677 if (err) 3663 3678 return err; 3664 3679 3665 - err = do_move_mount(&old_path, path, 0); 3666 - path_put(&old_path); 3667 - return err; 3680 + return do_move_mount(&old_path, path, 0); 3668 3681 } 3669 3682 3670 3683 /* 3671 3684 * add a mount into a namespace's mount tree 3672 3685 */ 3673 - static int do_add_mount(struct mount *newmnt, struct mountpoint *mp, 3674 - const struct path *path, int mnt_flags) 3686 + static int do_add_mount(struct mount *newmnt, const struct pinned_mountpoint *mp, 3687 + int mnt_flags) 3675 3688 { 3676 - struct mount *parent = real_mount(path->mnt); 3689 + struct mount *parent = mp->parent; 3690 + 3691 + if (IS_ERR(parent)) 3692 + return PTR_ERR(parent); 3677 3693 3678 3694 mnt_flags &= ~MNT_INTERNAL_FLAGS; 3679 3695 ··· 3688 3702 } 3689 3703 3690 3704 /* Refuse the same filesystem on the same mount point */ 3691 - if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path)) 3705 + if (parent->mnt.mnt_sb == newmnt->mnt.mnt_sb && 3706 + parent->mnt.mnt_root == mp->mp->m_dentry) 3692 3707 return -EBUSY; 3693 3708 3694 3709 if (d_is_symlink(newmnt->mnt.mnt_root)) 3695 3710 return -EINVAL; 3696 3711 3697 3712 newmnt->mnt.mnt_flags = mnt_flags; 3698 - return graft_tree(newmnt, parent, mp); 3713 + return graft_tree(newmnt, mp); 3699 3714 } 3700 3715 3701 3716 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); ··· 3705 3718 * Create a new mount using a superblock configuration and request it 3706 3719 * be added to the namespace tree. 3707 3720 */ 3708 - static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, 3721 + static int do_new_mount_fc(struct fs_context *fc, const struct path *mountpoint, 3709 3722 unsigned int mnt_flags) 3710 3723 { 3711 - struct vfsmount *mnt; 3712 - struct pinned_mountpoint mp = {}; 3713 - struct super_block *sb = fc->root->d_sb; 3724 + struct super_block *sb; 3725 + struct vfsmount *mnt __free(mntput) = fc_mount(fc); 3714 3726 int error; 3715 3727 3716 - error = security_sb_kern_mount(sb); 3717 - if (!error && mount_too_revealing(sb, &mnt_flags)) 3718 - error = -EPERM; 3719 - 3720 - if (unlikely(error)) { 3721 - fc_drop_locked(fc); 3722 - return error; 3723 - } 3724 - 3725 - up_write(&sb->s_umount); 3726 - 3727 - mnt = vfs_create_mount(fc); 3728 3728 if (IS_ERR(mnt)) 3729 3729 return PTR_ERR(mnt); 3730 3730 3731 + sb = fc->root->d_sb; 3732 + error = security_sb_kern_mount(sb); 3733 + if (unlikely(error)) 3734 + return error; 3735 + 3736 + if (unlikely(mount_too_revealing(sb, &mnt_flags))) 3737 + return -EPERM; 3738 + 3731 3739 mnt_warn_timestamp_expiry(mountpoint, mnt); 3732 3740 3733 - error = lock_mount(mountpoint, &mp); 3734 - if (!error) { 3735 - error = do_add_mount(real_mount(mnt), mp.mp, 3736 - mountpoint, mnt_flags); 3737 - unlock_mount(&mp); 3738 - } 3739 - if (error < 0) 3740 - mntput(mnt); 3741 + LOCK_MOUNT(mp, mountpoint); 3742 + error = do_add_mount(real_mount(mnt), &mp, mnt_flags); 3743 + if (!error) 3744 + retain_and_null_ptr(mnt); // consumed on success 3741 3745 return error; 3742 3746 } 3743 3747 ··· 3736 3758 * create a new mount for userspace and request it to be added into the 3737 3759 * namespace's tree 3738 3760 */ 3739 - static int do_new_mount(struct path *path, const char *fstype, int sb_flags, 3740 - int mnt_flags, const char *name, void *data) 3761 + static int do_new_mount(const struct path *path, const char *fstype, 3762 + int sb_flags, int mnt_flags, 3763 + const char *name, void *data) 3741 3764 { 3742 3765 struct file_system_type *type; 3743 3766 struct fs_context *fc; ··· 3784 3805 if (!err && !mount_capable(fc)) 3785 3806 err = -EPERM; 3786 3807 if (!err) 3787 - err = vfs_get_tree(fc); 3788 - if (!err) 3789 3808 err = do_new_mount_fc(fc, path, mnt_flags); 3790 3809 3791 3810 put_fs_context(fc); 3792 3811 return err; 3793 3812 } 3794 3813 3795 - int finish_automount(struct vfsmount *m, const struct path *path) 3814 + static void lock_mount_exact(const struct path *path, 3815 + struct pinned_mountpoint *mp) 3796 3816 { 3797 3817 struct dentry *dentry = path->dentry; 3798 - struct pinned_mountpoint mp = {}; 3818 + int err; 3819 + 3820 + inode_lock(dentry->d_inode); 3821 + namespace_lock(); 3822 + if (unlikely(cant_mount(dentry))) 3823 + err = -ENOENT; 3824 + else if (path_overmounted(path)) 3825 + err = -EBUSY; 3826 + else 3827 + err = get_mountpoint(dentry, mp); 3828 + if (unlikely(err)) { 3829 + namespace_unlock(); 3830 + inode_unlock(dentry->d_inode); 3831 + mp->parent = ERR_PTR(err); 3832 + } else { 3833 + mp->parent = real_mount(path->mnt); 3834 + } 3835 + } 3836 + 3837 + int finish_automount(struct vfsmount *__m, const struct path *path) 3838 + { 3839 + struct vfsmount *m __free(mntput) = __m; 3799 3840 struct mount *mnt; 3800 3841 int err; 3801 3842 ··· 3826 3827 3827 3828 mnt = real_mount(m); 3828 3829 3829 - if (m->mnt_sb == path->mnt->mnt_sb && 3830 - m->mnt_root == dentry) { 3831 - err = -ELOOP; 3832 - goto discard; 3833 - } 3830 + if (m->mnt_root == path->dentry) 3831 + return -ELOOP; 3834 3832 3835 3833 /* 3836 - * we don't want to use lock_mount() - in this case finding something 3834 + * we don't want to use LOCK_MOUNT() - in this case finding something 3837 3835 * that overmounts our mountpoint to be means "quitely drop what we've 3838 3836 * got", not "try to mount it on top". 3839 3837 */ 3840 - inode_lock(dentry->d_inode); 3841 - namespace_lock(); 3842 - if (unlikely(cant_mount(dentry))) { 3843 - err = -ENOENT; 3844 - goto discard_locked; 3845 - } 3846 - if (path_overmounted(path)) { 3847 - err = 0; 3848 - goto discard_locked; 3849 - } 3850 - err = get_mountpoint(dentry, &mp); 3851 - if (err) 3852 - goto discard_locked; 3838 + LOCK_MOUNT_EXACT(mp, path); 3839 + if (mp.parent == ERR_PTR(-EBUSY)) 3840 + return 0; 3853 3841 3854 - err = do_add_mount(mnt, mp.mp, path, 3855 - path->mnt->mnt_flags | MNT_SHRINKABLE); 3856 - unlock_mount(&mp); 3857 - if (unlikely(err)) 3858 - goto discard; 3859 - return 0; 3860 - 3861 - discard_locked: 3862 - namespace_unlock(); 3863 - inode_unlock(dentry->d_inode); 3864 - discard: 3865 - mntput(m); 3842 + err = do_add_mount(mnt, &mp, path->mnt->mnt_flags | MNT_SHRINKABLE); 3843 + if (likely(!err)) 3844 + retain_and_null_ptr(m); 3866 3845 return err; 3867 3846 } 3868 3847 ··· 3851 3874 */ 3852 3875 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 3853 3876 { 3854 - read_seqlock_excl(&mount_lock); 3877 + guard(mount_locked_reader)(); 3855 3878 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 3856 - read_sequnlock_excl(&mount_lock); 3857 3879 } 3858 3880 EXPORT_SYMBOL(mnt_set_expiry); 3859 3881 ··· 3869 3893 if (list_empty(mounts)) 3870 3894 return; 3871 3895 3872 - namespace_lock(); 3873 - lock_mount_hash(); 3896 + guard(namespace_excl)(); 3897 + guard(mount_writer)(); 3874 3898 3875 3899 /* extract from the expiration list every vfsmount that matches the 3876 3900 * following criteria: ··· 3892 3916 touch_mnt_namespace(mnt->mnt_ns); 3893 3917 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); 3894 3918 } 3895 - unlock_mount_hash(); 3896 - namespace_unlock(); 3897 3919 } 3898 3920 3899 3921 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); ··· 4019 4045 * Therefore, if this magic number is present, it carries no information 4020 4046 * and must be discarded. 4021 4047 */ 4022 - int path_mount(const char *dev_name, struct path *path, 4048 + int path_mount(const char *dev_name, const struct path *path, 4023 4049 const char *type_page, unsigned long flags, void *data_page) 4024 4050 { 4025 4051 unsigned int mnt_flags = 0, sb_flags; ··· 4101 4127 int do_mount(const char *dev_name, const char __user *dir_name, 4102 4128 const char *type_page, unsigned long flags, void *data_page) 4103 4129 { 4104 - struct path path; 4130 + struct path path __free(path_put) = {}; 4105 4131 int ret; 4106 4132 4107 4133 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); 4108 4134 if (ret) 4109 4135 return ret; 4110 - ret = path_mount(dev_name, &path, type_page, flags, data_page); 4111 - path_put(&path); 4112 - return ret; 4136 + return path_mount(dev_name, &path, type_page, flags, data_page); 4113 4137 } 4114 4138 4115 4139 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) ··· 4179 4207 struct user_namespace *user_ns, struct fs_struct *new_fs) 4180 4208 { 4181 4209 struct mnt_namespace *new_ns; 4182 - struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 4210 + struct vfsmount *rootmnt __free(mntput) = NULL; 4211 + struct vfsmount *pwdmnt __free(mntput) = NULL; 4183 4212 struct mount *p, *q; 4184 4213 struct mount *old; 4185 4214 struct mount *new; ··· 4199 4226 if (IS_ERR(new_ns)) 4200 4227 return new_ns; 4201 4228 4202 - namespace_lock(); 4229 + guard(namespace_excl)(); 4203 4230 /* First pass: copy the tree topology */ 4204 4231 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 4205 4232 if (user_ns != ns->user_ns) 4206 4233 copy_flags |= CL_SLAVE; 4207 4234 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 4208 4235 if (IS_ERR(new)) { 4209 - namespace_unlock(); 4210 - ns_free_inum(&new_ns->ns); 4211 - dec_mnt_namespaces(new_ns->ucounts); 4212 - mnt_ns_release(new_ns); 4236 + emptied_ns = new_ns; 4213 4237 return ERR_CAST(new); 4214 4238 } 4215 4239 if (user_ns != ns->user_ns) { 4216 - lock_mount_hash(); 4240 + guard(mount_writer)(); 4217 4241 lock_mnt_tree(new); 4218 - unlock_mount_hash(); 4219 4242 } 4220 4243 new_ns->root = new; 4221 4244 ··· 4243 4274 while (p->mnt.mnt_root != q->mnt.mnt_root) 4244 4275 p = next_mnt(skip_mnt_tree(p), old); 4245 4276 } 4246 - namespace_unlock(); 4247 - 4248 - if (rootmnt) 4249 - mntput(rootmnt); 4250 - if (pwdmnt) 4251 - mntput(pwdmnt); 4252 - 4253 4277 mnt_ns_tree_add(new_ns); 4254 4278 return new_ns; 4255 4279 } ··· 4467 4505 return ret; 4468 4506 } 4469 4507 4470 - static inline int vfs_move_mount(struct path *from_path, struct path *to_path, 4508 + static inline int vfs_move_mount(const struct path *from_path, 4509 + const struct path *to_path, 4471 4510 enum mnt_tree_flags_t mflags) 4472 4511 { 4473 4512 int ret; ··· 4574 4611 /* 4575 4612 * Return true if path is reachable from root 4576 4613 * 4577 - * namespace_sem or mount_lock is held 4614 + * locks: mount_locked_reader || namespace_shared && is_mounted(mnt) 4578 4615 */ 4579 4616 bool is_path_reachable(struct mount *mnt, struct dentry *dentry, 4580 4617 const struct path *root) ··· 4588 4625 4589 4626 bool path_is_under(const struct path *path1, const struct path *path2) 4590 4627 { 4591 - bool res; 4592 - read_seqlock_excl(&mount_lock); 4593 - res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 4594 - read_sequnlock_excl(&mount_lock); 4595 - return res; 4628 + guard(mount_locked_reader)(); 4629 + return is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 4596 4630 } 4597 4631 EXPORT_SYMBOL(path_is_under); 4598 4632 ··· 4621 4661 SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 4622 4662 const char __user *, put_old) 4623 4663 { 4624 - struct path new, old, root; 4664 + struct path new __free(path_put) = {}; 4665 + struct path old __free(path_put) = {}; 4666 + struct path root __free(path_put) = {}; 4625 4667 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent; 4626 - struct pinned_mountpoint old_mp = {}; 4627 4668 int error; 4628 4669 4629 4670 if (!may_mount()) ··· 4633 4672 error = user_path_at(AT_FDCWD, new_root, 4634 4673 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new); 4635 4674 if (error) 4636 - goto out0; 4675 + return error; 4637 4676 4638 4677 error = user_path_at(AT_FDCWD, put_old, 4639 4678 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old); 4640 4679 if (error) 4641 - goto out1; 4680 + return error; 4642 4681 4643 4682 error = security_sb_pivotroot(&old, &new); 4644 4683 if (error) 4645 - goto out2; 4684 + return error; 4646 4685 4647 4686 get_fs_root(current->fs, &root); 4648 - error = lock_mount(&old, &old_mp); 4649 - if (error) 4650 - goto out3; 4651 4687 4652 - error = -EINVAL; 4688 + LOCK_MOUNT(old_mp, &old); 4689 + old_mnt = old_mp.parent; 4690 + if (IS_ERR(old_mnt)) 4691 + return PTR_ERR(old_mnt); 4692 + 4653 4693 new_mnt = real_mount(new.mnt); 4654 4694 root_mnt = real_mount(root.mnt); 4655 - old_mnt = real_mount(old.mnt); 4656 4695 ex_parent = new_mnt->mnt_parent; 4657 4696 root_parent = root_mnt->mnt_parent; 4658 4697 if (IS_MNT_SHARED(old_mnt) || 4659 4698 IS_MNT_SHARED(ex_parent) || 4660 4699 IS_MNT_SHARED(root_parent)) 4661 - goto out4; 4700 + return -EINVAL; 4662 4701 if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 4663 - goto out4; 4702 + return -EINVAL; 4664 4703 if (new_mnt->mnt.mnt_flags & MNT_LOCKED) 4665 - goto out4; 4666 - error = -ENOENT; 4704 + return -EINVAL; 4667 4705 if (d_unlinked(new.dentry)) 4668 - goto out4; 4669 - error = -EBUSY; 4706 + return -ENOENT; 4670 4707 if (new_mnt == root_mnt || old_mnt == root_mnt) 4671 - goto out4; /* loop, on the same file system */ 4672 - error = -EINVAL; 4708 + return -EBUSY; /* loop, on the same file system */ 4673 4709 if (!path_mounted(&root)) 4674 - goto out4; /* not a mountpoint */ 4710 + return -EINVAL; /* not a mountpoint */ 4675 4711 if (!mnt_has_parent(root_mnt)) 4676 - goto out4; /* absolute root */ 4712 + return -EINVAL; /* absolute root */ 4677 4713 if (!path_mounted(&new)) 4678 - goto out4; /* not a mountpoint */ 4714 + return -EINVAL; /* not a mountpoint */ 4679 4715 if (!mnt_has_parent(new_mnt)) 4680 - goto out4; /* absolute root */ 4716 + return -EINVAL; /* absolute root */ 4681 4717 /* make sure we can reach put_old from new_root */ 4682 - if (!is_path_reachable(old_mnt, old.dentry, &new)) 4683 - goto out4; 4718 + if (!is_path_reachable(old_mnt, old_mp.mp->m_dentry, &new)) 4719 + return -EINVAL; 4684 4720 /* make certain new is below the root */ 4685 4721 if (!is_path_reachable(new_mnt, new.dentry, &root)) 4686 - goto out4; 4722 + return -EINVAL; 4687 4723 lock_mount_hash(); 4688 4724 umount_mnt(new_mnt); 4689 4725 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { ··· 4699 4741 mnt_notify_add(root_mnt); 4700 4742 mnt_notify_add(new_mnt); 4701 4743 chroot_fs_refs(&root, &new); 4702 - error = 0; 4703 - out4: 4704 - unlock_mount(&old_mp); 4705 - out3: 4706 - path_put(&root); 4707 - out2: 4708 - path_put(&old); 4709 - out1: 4710 - path_put(&new); 4711 - out0: 4712 - return error; 4744 + return 0; 4713 4745 } 4714 4746 4715 4747 static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) ··· 4789 4841 4790 4842 if (!mnt_allow_writers(kattr, m)) { 4791 4843 err = mnt_hold_writers(m); 4792 - if (err) 4844 + if (err) { 4845 + m = next_mnt(m, mnt); 4793 4846 break; 4847 + } 4794 4848 } 4795 4849 4796 4850 if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) ··· 4800 4850 } 4801 4851 4802 4852 if (err) { 4803 - struct mount *p; 4804 - 4805 - /* 4806 - * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will 4807 - * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all 4808 - * mounts and needs to take care to include the first mount. 4809 - */ 4810 - for (p = mnt; p; p = next_mnt(p, mnt)) { 4811 - /* If we had to hold writers unblock them. */ 4812 - if (p->mnt.mnt_flags & MNT_WRITE_HOLD) 4813 - mnt_unhold_writers(p); 4814 - 4815 - /* 4816 - * We're done once the first mount we changed got 4817 - * MNT_WRITE_HOLD unset. 4818 - */ 4819 - if (p == m) 4820 - break; 4821 - } 4853 + /* undo all mnt_hold_writers() we'd done */ 4854 + for (struct mount *p = mnt; p != m; p = next_mnt(p, mnt)) 4855 + mnt_unhold_writers(p); 4822 4856 } 4823 4857 return err; 4824 4858 } ··· 4833 4899 WRITE_ONCE(m->mnt.mnt_flags, flags); 4834 4900 4835 4901 /* If we had to hold writers unblock them. */ 4836 - if (m->mnt.mnt_flags & MNT_WRITE_HOLD) 4837 - mnt_unhold_writers(m); 4902 + mnt_unhold_writers(m); 4838 4903 4839 4904 if (kattr->propagation) 4840 4905 change_mnt_propagation(m, kattr->propagation); ··· 4843 4910 touch_mnt_namespace(mnt->mnt_ns); 4844 4911 } 4845 4912 4846 - static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) 4913 + static int do_mount_setattr(const struct path *path, struct mount_kattr *kattr) 4847 4914 { 4848 4915 struct mount *mnt = real_mount(path->mnt); 4849 4916 int err = 0; ··· 5641 5708 STATMOUNT_MNT_UIDMAP | \ 5642 5709 STATMOUNT_MNT_GIDMAP) 5643 5710 5711 + /* locks: namespace_shared */ 5644 5712 static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, 5645 5713 struct mnt_namespace *ns) 5646 5714 { ··· 5891 5957 if (ret) 5892 5958 return ret; 5893 5959 5894 - scoped_guard(rwsem_read, &namespace_sem) 5960 + scoped_guard(namespace_shared) 5895 5961 ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns); 5896 5962 5897 5963 if (!ret) ··· 5902 5968 return ret; 5903 5969 } 5904 5970 5971 + /* locks: namespace_shared */ 5905 5972 static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id, 5906 5973 u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids, 5907 5974 bool reverse) ··· 6014 6079 * We only need to guard against mount topology changes as 6015 6080 * listmount() doesn't care about any mount properties. 6016 6081 */ 6017 - scoped_guard(rwsem_read, &namespace_sem) 6082 + scoped_guard(namespace_shared) 6018 6083 ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids, 6019 6084 nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); 6020 6085 if (ret <= 0) ··· 6097 6162 { 6098 6163 if (!refcount_dec_and_test(&ns->ns.count)) 6099 6164 return; 6100 - namespace_lock(); 6165 + guard(namespace_excl)(); 6101 6166 emptied_ns = ns; 6102 - lock_mount_hash(); 6167 + guard(mount_writer)(); 6103 6168 umount_tree(ns->root, 0); 6104 - unlock_mount_hash(); 6105 - namespace_unlock(); 6106 6169 } 6107 6170 6108 6171 struct vfsmount *kern_mount(struct file_system_type *type) ··· 6149 6216 bool current_chrooted(void) 6150 6217 { 6151 6218 /* Does the current process have a non-standard root */ 6152 - struct path ns_root; 6153 - struct path fs_root; 6154 - bool chrooted; 6155 - 6156 - /* Find the namespace root */ 6157 - ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt; 6158 - ns_root.dentry = ns_root.mnt->mnt_root; 6159 - path_get(&ns_root); 6160 - while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) 6161 - ; 6219 + struct path fs_root __free(path_put) = {}; 6220 + struct mount *root; 6162 6221 6163 6222 get_fs_root(current->fs, &fs_root); 6164 6223 6165 - chrooted = !path_equal(&fs_root, &ns_root); 6224 + /* Find the namespace root */ 6166 6225 6167 - path_put(&fs_root); 6168 - path_put(&ns_root); 6226 + guard(mount_locked_reader)(); 6169 6227 6170 - return chrooted; 6228 + root = topmost_overmount(current->nsproxy->mnt_ns->root); 6229 + 6230 + return fs_root.mnt != &root->mnt || !path_mounted(&fs_root); 6171 6231 } 6172 6232 6173 6233 static bool mnt_already_visible(struct mnt_namespace *ns, ··· 6169 6243 { 6170 6244 int new_flags = *new_mnt_flags; 6171 6245 struct mount *mnt, *n; 6172 - bool visible = false; 6173 6246 6174 - down_read(&namespace_sem); 6247 + guard(namespace_shared)(); 6175 6248 rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) { 6176 6249 struct mount *child; 6177 6250 int mnt_flags; ··· 6217 6292 /* Preserve the locked attributes */ 6218 6293 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \ 6219 6294 MNT_LOCK_ATIME); 6220 - visible = true; 6221 - goto found; 6295 + return true; 6222 6296 next: ; 6223 6297 } 6224 - found: 6225 - up_read(&namespace_sem); 6226 - return visible; 6298 + return false; 6227 6299 } 6228 6300 6229 6301 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)

+1 -1

fs/nfs/localio.c

··· 529 529 } 530 530 531 531 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */ 532 - static int __vfs_getattr(struct path *p, struct kstat *stat, int version) 532 + static int __vfs_getattr(const struct path *p, struct kstat *stat, int version) 533 533 { 534 534 u32 request_mask = STATX_BASIC_STATS; 535 535

+2 -2

fs/nfsd/export.c

··· 402 402 struct svc_export *old); 403 403 static struct svc_export *svc_export_lookup(struct svc_export *); 404 404 405 - static int check_export(struct path *path, int *flags, unsigned char *uuid) 405 + static int check_export(const struct path *path, int *flags, unsigned char *uuid) 406 406 { 407 407 struct inode *inode = d_inode(path->dentry); 408 408 ··· 1181 1181 * use exp_get_by_name() or exp_find(). 1182 1182 */ 1183 1183 struct svc_export * 1184 - rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) 1184 + rqst_exp_get_by_name(struct svc_rqst *rqstp, const struct path *path) 1185 1185 { 1186 1186 struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); 1187 1187 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);

+1 -1

fs/nfsd/export.h

··· 111 111 void nfsd_export_shutdown(struct net *); 112 112 void nfsd_export_flush(struct net *); 113 113 struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, 114 - struct path *); 114 + const struct path *); 115 115 struct svc_export * rqst_exp_parent(struct svc_rqst *, 116 116 struct path *); 117 117 struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);

+1 -1

fs/overlayfs/copy_up.c

··· 242 242 return 0; 243 243 } 244 244 245 - static int ovl_sync_file(struct path *path) 245 + static int ovl_sync_file(const struct path *path) 246 246 { 247 247 struct file *new_file; 248 248 int err;

+1 -1

fs/overlayfs/file.c

··· 120 120 } 121 121 122 122 static struct file *ovl_real_file_path(const struct file *file, 123 - struct path *realpath) 123 + const struct path *realpath) 124 124 { 125 125 struct ovl_file *of = file->private_data; 126 126 struct file *realfile = of->realfile;

+4 -4

fs/overlayfs/overlayfs.h

··· 563 563 struct ovl_metacopy *metacopy); 564 564 bool ovl_is_metacopy_dentry(struct dentry *dentry); 565 565 char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); 566 - int ovl_ensure_verity_loaded(struct path *path); 566 + int ovl_ensure_verity_loaded(const struct path *path); 567 567 int ovl_validate_verity(struct ovl_fs *ofs, 568 - struct path *metapath, 569 - struct path *datapath); 570 - int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, 568 + const struct path *metapath, 569 + const struct path *datapath); 570 + int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src, 571 571 struct ovl_metacopy *metacopy); 572 572 int ovl_sync_status(struct ovl_fs *ofs); 573 573

+1 -1

fs/overlayfs/super.c

··· 394 394 return err; 395 395 } 396 396 397 - static int ovl_lower_dir(const char *name, struct path *path, 397 + static int ovl_lower_dir(const char *name, const struct path *path, 398 398 struct ovl_fs *ofs, int *stack_depth) 399 399 { 400 400 int fh_type;

+4 -4

fs/overlayfs/util.c

··· 1381 1381 } 1382 1382 1383 1383 /* Call with mounter creds as it may open the file */ 1384 - int ovl_ensure_verity_loaded(struct path *datapath) 1384 + int ovl_ensure_verity_loaded(const struct path *datapath) 1385 1385 { 1386 1386 struct inode *inode = d_inode(datapath->dentry); 1387 1387 struct file *filp; ··· 1401 1401 } 1402 1402 1403 1403 int ovl_validate_verity(struct ovl_fs *ofs, 1404 - struct path *metapath, 1405 - struct path *datapath) 1404 + const struct path *metapath, 1405 + const struct path *datapath) 1406 1406 { 1407 1407 struct ovl_metacopy metacopy_data; 1408 1408 u8 actual_digest[FS_VERITY_MAX_DIGEST_SIZE]; ··· 1455 1455 return 0; 1456 1456 } 1457 1457 1458 - int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, 1458 + int ovl_get_verity_digest(struct ovl_fs *ofs, const struct path *src, 1459 1459 struct ovl_metacopy *metacopy) 1460 1460 { 1461 1461 int err, digest_size;

+1 -1

fs/pidfs.c

··· 847 847 return 0; 848 848 } 849 849 850 - static struct file *pidfs_export_open(struct path *path, unsigned int oflags) 850 + static struct file *pidfs_export_open(const struct path *path, unsigned int oflags) 851 851 { 852 852 /* 853 853 * Clear O_LARGEFILE as open_by_handle_at() forces it and raise

+56 -19

fs/pnode.c

··· 29 29 return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave); 30 30 } 31 31 32 + /* locks: namespace_shared && is_mounted(mnt) */ 32 33 static struct mount *get_peer_under_root(struct mount *mnt, 33 34 struct mnt_namespace *ns, 34 35 const struct path *root) ··· 51 50 * Get ID of closest dominating peer group having a representative 52 51 * under the given root. 53 52 * 54 - * Caller must hold namespace_sem 53 + * locks: namespace_shared 55 54 */ 56 55 int get_dominating_id(struct mount *mnt, const struct path *root) 57 56 { ··· 69 68 static inline bool will_be_unmounted(struct mount *m) 70 69 { 71 70 return m->mnt.mnt_flags & MNT_UMOUNT; 72 - } 73 - 74 - static struct mount *propagation_source(struct mount *mnt) 75 - { 76 - do { 77 - struct mount *m; 78 - for (m = next_peer(mnt); m != mnt; m = next_peer(m)) { 79 - if (!will_be_unmounted(m)) 80 - return m; 81 - } 82 - mnt = mnt->mnt_master; 83 - } while (mnt && will_be_unmounted(mnt)); 84 - return mnt; 85 71 } 86 72 87 73 static void transfer_propagation(struct mount *mnt, struct mount *to) ··· 99 111 return; 100 112 } 101 113 if (IS_MNT_SHARED(mnt)) { 102 - if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list)) 103 - m = propagation_source(mnt); 104 114 if (list_empty(&mnt->mnt_share)) { 105 115 mnt_release_group_id(mnt); 106 116 } else { 117 + m = next_peer(mnt); 107 118 list_del_init(&mnt->mnt_share); 108 119 mnt->mnt_group_id = 0; 109 120 } ··· 120 133 mnt->mnt_t_flags |= T_UNBINDABLE; 121 134 else 122 135 mnt->mnt_t_flags &= ~T_UNBINDABLE; 136 + } 137 + } 138 + 139 + static struct mount *trace_transfers(struct mount *m) 140 + { 141 + while (1) { 142 + struct mount *next = next_peer(m); 143 + 144 + if (next != m) { 145 + list_del_init(&m->mnt_share); 146 + m->mnt_group_id = 0; 147 + m->mnt_master = next; 148 + } else { 149 + if (IS_MNT_SHARED(m)) 150 + mnt_release_group_id(m); 151 + next = m->mnt_master; 152 + } 153 + hlist_del_init(&m->mnt_slave); 154 + CLEAR_MNT_SHARED(m); 155 + SET_MNT_MARK(m); 156 + 157 + if (!next || !will_be_unmounted(next)) 158 + return next; 159 + if (IS_MNT_MARKED(next)) 160 + return next->mnt_master; 161 + m = next; 162 + } 163 + } 164 + 165 + static void set_destinations(struct mount *m, struct mount *master) 166 + { 167 + struct mount *next; 168 + 169 + while ((next = m->mnt_master) != master) { 170 + m->mnt_master = master; 171 + m = next; 172 + } 173 + } 174 + 175 + void bulk_make_private(struct list_head *set) 176 + { 177 + struct mount *m; 178 + 179 + list_for_each_entry(m, set, mnt_list) 180 + if (!IS_MNT_MARKED(m)) 181 + set_destinations(m, trace_transfers(m)); 182 + 183 + list_for_each_entry(m, set, mnt_list) { 184 + transfer_propagation(m, m->mnt_master); 185 + m->mnt_master = NULL; 186 + CLEAR_MNT_MARK(m); 123 187 } 124 188 } 125 189 ··· 342 304 err = PTR_ERR(this); 343 305 break; 344 306 } 345 - read_seqlock_excl(&mount_lock); 346 - mnt_set_mountpoint(n, dest_mp, this); 347 - read_sequnlock_excl(&mount_lock); 307 + scoped_guard(mount_locked_reader) 308 + mnt_set_mountpoint(n, dest_mp, this); 348 309 if (n->mnt_master) 349 310 SET_MNT_MARK(n->mnt_master); 350 311 copy = this;

+1

fs/pnode.h

··· 42 42 } 43 43 44 44 void change_mnt_propagation(struct mount *, int); 45 + void bulk_make_private(struct list_head *); 45 46 int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, 46 47 struct hlist_head *); 47 48 void propagate_umount(struct list_head *);

+4 -4

fs/smb/server/vfs.c

··· 72 72 { 73 73 struct qstr last; 74 74 struct filename *filename __free(putname) = NULL; 75 - struct path *root_share_path = &share_conf->vfs_path; 75 + const struct path *root_share_path = &share_conf->vfs_path; 76 76 int err, type; 77 77 struct dentry *d; 78 78 ··· 1306 1306 caseless, true); 1307 1307 } 1308 1308 1309 - void ksmbd_vfs_kern_path_unlock(struct path *path) 1309 + void ksmbd_vfs_kern_path_unlock(const struct path *path) 1310 1310 { 1311 1311 /* While lock is still held, ->d_parent is safe */ 1312 1312 inode_unlock(d_inode(path->dentry->d_parent)); ··· 1856 1856 } 1857 1857 1858 1858 int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, 1859 - struct path *path) 1859 + const struct path *path) 1860 1860 { 1861 1861 struct posix_acl_state acl_state; 1862 1862 struct posix_acl *acls; ··· 1909 1909 } 1910 1910 1911 1911 int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, 1912 - struct path *path, struct inode *parent_inode) 1912 + const struct path *path, struct inode *parent_inode) 1913 1913 { 1914 1914 struct posix_acl *acls; 1915 1915 struct posix_acl_entry *pace;

+3 -3

fs/smb/server/vfs.h

··· 123 123 int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, 124 124 unsigned int flags, 125 125 struct path *path, bool caseless); 126 - void ksmbd_vfs_kern_path_unlock(struct path *path); 126 + void ksmbd_vfs_kern_path_unlock(const struct path *path); 127 127 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, 128 128 const char *name, 129 129 unsigned int flags, ··· 164 164 struct dentry *dentry, 165 165 struct xattr_dos_attrib *da); 166 166 int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, 167 - struct path *path); 167 + const struct path *path); 168 168 int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, 169 - struct path *path, 169 + const struct path *path, 170 170 struct inode *parent_inode); 171 171 #endif /* __KSMBD_VFS_H__ */

+1 -1

fs/stat.c

··· 293 293 return lookup_flags; 294 294 } 295 295 296 - static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, 296 + static int vfs_statx_path(const struct path *path, int flags, struct kstat *stat, 297 297 u32 request_mask) 298 298 { 299 299 int error = vfs_getattr(path, stat, request_mask, flags);

+1 -2

fs/super.c

··· 323 323 if (!s) 324 324 return NULL; 325 325 326 - INIT_LIST_HEAD(&s->s_mounts); 327 326 s->s_user_ns = get_user_ns(user_ns); 328 327 init_rwsem(&s->s_umount); 329 328 lockdep_set_class(&s->s_umount, &type->s_umount_key); ··· 407 408 list_del_init(&s->s_list); 408 409 WARN_ON(s->s_dentry_lru.node); 409 410 WARN_ON(s->s_inode_lru.node); 410 - WARN_ON(!list_empty(&s->s_mounts)); 411 + WARN_ON(s->s_mounts); 411 412 call_rcu(&s->rcu, destroy_super_rcu); 412 413 } 413 414 }

+1 -1

include/linux/exportfs.h

··· 270 270 int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, 271 271 int nr_iomaps, struct iattr *iattr); 272 272 int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); 273 - struct file * (*open)(struct path *path, unsigned int oflags); 273 + struct file * (*open)(const struct path *path, unsigned int oflags); 274 274 #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ 275 275 #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ 276 276 #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */

+4 -2

include/linux/fs.h

··· 1324 1324 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; 1325 1325 }; 1326 1326 1327 + struct mount; 1328 + 1327 1329 struct super_block { 1328 1330 struct list_head s_list; /* Keep this first */ 1329 1331 dev_t s_dev; /* search index; _not_ kdev_t */ ··· 1360 1358 __u16 s_encoding_flags; 1361 1359 #endif 1362 1360 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ 1363 - struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1361 + struct mount *s_mounts; /* list of mounts; _not_ for fs use */ 1364 1362 struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ 1365 1363 struct file *s_bdev_file; 1366 1364 struct backing_dev_info *s_bdi; ··· 2881 2879 const struct cred *cred); 2882 2880 struct file *dentry_create(const struct path *path, int flags, umode_t mode, 2883 2881 const struct cred *cred); 2884 - struct path *backing_file_user_path(const struct file *f); 2882 + const struct path *backing_file_user_path(const struct file *f); 2885 2883 2886 2884 /* 2887 2885 * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file

+4 -5

include/linux/mount.h

··· 33 33 MNT_NOSYMFOLLOW = 0x80, 34 34 35 35 MNT_SHRINKABLE = 0x100, 36 - MNT_WRITE_HOLD = 0x200, 37 36 38 37 MNT_INTERNAL = 0x4000, 39 38 ··· 51 52 | MNT_READONLY | MNT_NOSYMFOLLOW, 52 53 MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, 53 54 54 - MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | 55 + MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED | 55 56 MNT_SYNC_UMOUNT | MNT_LOCKED 56 57 }; 57 58 ··· 76 77 extern struct vfsmount *mntget(struct vfsmount *mnt); 77 78 extern void mnt_make_shortterm(struct vfsmount *mnt); 78 79 extern struct vfsmount *mnt_clone_internal(const struct path *path); 79 - extern bool __mnt_is_readonly(struct vfsmount *mnt); 80 + extern bool __mnt_is_readonly(const struct vfsmount *mnt); 80 81 extern bool mnt_may_suid(struct vfsmount *mnt); 81 82 82 83 extern struct vfsmount *clone_private_mount(const struct path *path); ··· 103 104 extern int may_umount(struct vfsmount *); 104 105 int do_mount(const char *, const char __user *, 105 106 const char *, unsigned long, void *); 106 - extern struct path *collect_paths(const struct path *, struct path *, unsigned); 107 - extern void drop_collected_paths(struct path *, struct path *); 107 + extern const struct path *collect_paths(const struct path *, struct path *, unsigned); 108 + extern void drop_collected_paths(const struct path *, const struct path *); 108 109 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); 109 110 110 111 extern int cifs_root_data(char **dev, char **opts);

+1 -1

include/linux/namei.h

··· 60 60 61 61 extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); 62 62 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); 63 - extern void done_path_create(struct path *, struct dentry *); 63 + extern void done_path_create(const struct path *, struct dentry *); 64 64 extern struct dentry *kern_path_locked(const char *, struct path *); 65 65 extern struct dentry *kern_path_locked_negative(const char *, struct path *); 66 66 extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);

+6 -6

kernel/audit_tree.c

··· 678 678 struct audit_tree *tree; 679 679 struct path path; 680 680 struct audit_node *node; 681 - struct path *paths; 681 + const struct path *paths; 682 682 struct path array[16]; 683 683 int err; 684 684 ··· 701 701 struct audit_chunk *chunk = find_chunk(node); 702 702 /* this could be NULL if the watch is dying else where... */ 703 703 node->index |= 1U<<31; 704 - for (struct path *p = paths; p->dentry; p++) { 704 + for (const struct path *p = paths; p->dentry; p++) { 705 705 struct inode *inode = p->dentry->d_inode; 706 706 if (inode_to_key(inode) == chunk->key) { 707 707 node->index &= ~(1U<<31); ··· 740 740 put_tree(tree); 741 741 } 742 742 743 - static int tag_mounts(struct path *paths, struct audit_tree *tree) 743 + static int tag_mounts(const struct path *paths, struct audit_tree *tree) 744 744 { 745 - for (struct path *p = paths; p->dentry; p++) { 745 + for (const struct path *p = paths; p->dentry; p++) { 746 746 int err = tag_chunk(p->dentry->d_inode, tree); 747 747 if (err) 748 748 return err; ··· 805 805 struct audit_tree *seed = rule->tree, *tree; 806 806 struct path path; 807 807 struct path array[16]; 808 - struct path *paths; 808 + const struct path *paths; 809 809 int err; 810 810 811 811 rule->tree = NULL; ··· 877 877 int failed = 0; 878 878 struct path path1, path2; 879 879 struct path array[16]; 880 - struct path *paths; 880 + const struct path *paths; 881 881 int err; 882 882 883 883 err = kern_path(new, 0, &path2);

+1 -1

kernel/trace/bpf_trace.c

··· 900 900 .arg1_type = ARG_ANYTHING, 901 901 }; 902 902 903 - BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) 903 + BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz) 904 904 { 905 905 struct path copy; 906 906 long len;

+7 -7

security/apparmor/af_unix.c

··· 31 31 } 32 32 33 33 static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred, 34 - struct aa_label *label, struct path *path) 34 + struct aa_label *label, const struct path *path) 35 35 { 36 36 AA_BUG(!label); 37 37 AA_BUG(!path); ··· 224 224 225 225 static int profile_sk_perm(struct aa_profile *profile, 226 226 struct apparmor_audit_data *ad, 227 - u32 request, struct sock *sk, struct path *path) 227 + u32 request, struct sock *sk, const struct path *path) 228 228 { 229 229 struct aa_ruleset *rules = profile->label.rules[0]; 230 230 struct aa_perms *p = NULL; ··· 386 386 387 387 /* null peer_label is allowed, in which case the peer_sk label is used */ 388 388 static int profile_peer_perm(struct aa_profile *profile, u32 request, 389 - struct sock *sk, struct path *path, 389 + struct sock *sk, const struct path *path, 390 390 struct sockaddr_un *peer_addr, 391 - int peer_addrlen, struct path *peer_path, 391 + int peer_addrlen, const struct path *peer_path, 392 392 struct aa_label *peer_label, 393 393 struct apparmor_audit_data *ad) 394 394 { ··· 445 445 static int aa_unix_label_sk_perm(const struct cred *subj_cred, 446 446 struct aa_label *label, 447 447 const char *op, u32 request, struct sock *sk, 448 - struct path *path) 448 + const struct path *path) 449 449 { 450 450 if (!unconfined(label)) { 451 451 struct aa_profile *profile; ··· 599 599 600 600 static int unix_peer_perm(const struct cred *subj_cred, 601 601 struct aa_label *label, const char *op, u32 request, 602 - struct sock *sk, struct path *path, 602 + struct sock *sk, const struct path *path, 603 603 struct sockaddr_un *peer_addr, int peer_addrlen, 604 - struct path *peer_path, struct aa_label *peer_label) 604 + const struct path *peer_path, struct aa_label *peer_label) 605 605 { 606 606 struct aa_profile *profile; 607 607 DEFINE_AUDIT_SK(ad, op, subj_cred, sk);

+1 -1

tools/testing/selftests/bpf/bpf_experimental.h

··· 219 219 * including the NULL termination character, stored in the supplied 220 220 * buffer. On error, a negative integer is returned. 221 221 */ 222 - extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym; 222 + extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; 223 223 224 224 /* This macro must be used to mark the exception callback corresponding to the 225 225 * main program. For example:

Configure Feed

Configure Feed