Merge tag 'vfs-6.15-rc1.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

+1

arch/alpha/kernel/syscalls/syscall.tbl

··· 506 506 574 common getxattrat sys_getxattrat 507 507 575 common listxattrat sys_listxattrat 508 508 576 common removexattrat sys_removexattrat 509 + 577 common open_tree_attr sys_open_tree_attr

+1

arch/arm/tools/syscall.tbl

··· 481 481 464 common getxattrat sys_getxattrat 482 482 465 common listxattrat sys_listxattrat 483 483 466 common removexattrat sys_removexattrat 484 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/arm64/tools/syscall_32.tbl

··· 478 478 464 common getxattrat sys_getxattrat 479 479 465 common listxattrat sys_listxattrat 480 480 466 common removexattrat sys_removexattrat 481 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/m68k/kernel/syscalls/syscall.tbl

··· 466 466 464 common getxattrat sys_getxattrat 467 467 465 common listxattrat sys_listxattrat 468 468 466 common removexattrat sys_removexattrat 469 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/microblaze/kernel/syscalls/syscall.tbl

··· 472 472 464 common getxattrat sys_getxattrat 473 473 465 common listxattrat sys_listxattrat 474 474 466 common removexattrat sys_removexattrat 475 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/mips/kernel/syscalls/syscall_n32.tbl

··· 405 405 464 n32 getxattrat sys_getxattrat 406 406 465 n32 listxattrat sys_listxattrat 407 407 466 n32 removexattrat sys_removexattrat 408 + 467 n32 open_tree_attr sys_open_tree_attr

+1

arch/mips/kernel/syscalls/syscall_n64.tbl

··· 381 381 464 n64 getxattrat sys_getxattrat 382 382 465 n64 listxattrat sys_listxattrat 383 383 466 n64 removexattrat sys_removexattrat 384 + 467 n64 open_tree_attr sys_open_tree_attr

+1

arch/mips/kernel/syscalls/syscall_o32.tbl

··· 454 454 464 o32 getxattrat sys_getxattrat 455 455 465 o32 listxattrat sys_listxattrat 456 456 466 o32 removexattrat sys_removexattrat 457 + 467 o32 open_tree_attr sys_open_tree_attr

+1

arch/parisc/kernel/syscalls/syscall.tbl

··· 465 465 464 common getxattrat sys_getxattrat 466 466 465 common listxattrat sys_listxattrat 467 467 466 common removexattrat sys_removexattrat 468 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/powerpc/kernel/syscalls/syscall.tbl

··· 557 557 464 common getxattrat sys_getxattrat 558 558 465 common listxattrat sys_listxattrat 559 559 466 common removexattrat sys_removexattrat 560 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/s390/kernel/syscalls/syscall.tbl

··· 469 469 464 common getxattrat sys_getxattrat sys_getxattrat 470 470 465 common listxattrat sys_listxattrat sys_listxattrat 471 471 466 common removexattrat sys_removexattrat sys_removexattrat 472 + 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr

+1

arch/sh/kernel/syscalls/syscall.tbl

··· 470 470 464 common getxattrat sys_getxattrat 471 471 465 common listxattrat sys_listxattrat 472 472 466 common removexattrat sys_removexattrat 473 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/sparc/kernel/syscalls/syscall.tbl

··· 512 512 464 common getxattrat sys_getxattrat 513 513 465 common listxattrat sys_listxattrat 514 514 466 common removexattrat sys_removexattrat 515 + 467 common open_tree_attr sys_open_tree_attr

+1

arch/x86/entry/syscalls/syscall_32.tbl

··· 472 472 464 i386 getxattrat sys_getxattrat 473 473 465 i386 listxattrat sys_listxattrat 474 474 466 i386 removexattrat sys_removexattrat 475 + 467 i386 open_tree_attr sys_open_tree_attr

+1

arch/x86/entry/syscalls/syscall_64.tbl

··· 390 390 464 common getxattrat sys_getxattrat 391 391 465 common listxattrat sys_listxattrat 392 392 466 common removexattrat sys_removexattrat 393 + 467 common open_tree_attr sys_open_tree_attr 393 394 394 395 # 395 396 # Due to a historical design error, certain syscalls are numbered differently

+1

arch/xtensa/kernel/syscalls/syscall.tbl

··· 437 437 464 common getxattrat sys_getxattrat 438 438 465 common listxattrat sys_listxattrat 439 439 466 common removexattrat sys_removexattrat 440 + 467 common open_tree_attr sys_open_tree_attr

+2

fs/autofs/autofs_i.h

··· 218 218 219 219 static inline int autofs_check_pipe(struct file *pipe) 220 220 { 221 + if (pipe->f_mode & FMODE_PATH) 222 + return -EINVAL; 221 223 if (!(pipe->f_mode & FMODE_CAN_WRITE)) 222 224 return -EINVAL; 223 225 if (!S_ISFIFO(file_inode(pipe)->i_mode))

+1 -1

fs/fsopen.c

··· 453 453 case FSCONFIG_SET_FD: 454 454 param.type = fs_value_is_file; 455 455 ret = -EBADF; 456 - param.file = fget(aux); 456 + param.file = fget_raw(aux); 457 457 if (!param.file) 458 458 goto out_key; 459 459 param.dirfd = aux;

+1

fs/internal.h

··· 338 338 } 339 339 void file_f_owner_release(struct file *file); 340 340 bool file_seek_cur_needs_f_lock(struct file *file); 341 + int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);

+51

fs/mnt_idmapping.c

··· 6 6 #include <linux/mnt_idmapping.h> 7 7 #include <linux/slab.h> 8 8 #include <linux/user_namespace.h> 9 + #include <linux/seq_file.h> 9 10 10 11 #include "internal.h" 11 12 ··· 335 334 free_mnt_idmap(idmap); 336 335 } 337 336 EXPORT_SYMBOL_GPL(mnt_idmap_put); 337 + 338 + int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) 339 + { 340 + struct uid_gid_map *map, *map_up; 341 + u32 idx, nr_mappings; 342 + 343 + if (!is_valid_mnt_idmap(idmap)) 344 + return 0; 345 + 346 + /* 347 + * Idmappings are shown relative to the caller's idmapping. 348 + * This is both the most intuitive and most useful solution. 349 + */ 350 + if (uid_map) { 351 + map = &idmap->uid_map; 352 + map_up = &current_user_ns()->uid_map; 353 + } else { 354 + map = &idmap->gid_map; 355 + map_up = &current_user_ns()->gid_map; 356 + } 357 + 358 + for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) { 359 + uid_t lower; 360 + struct uid_gid_extent *extent; 361 + 362 + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) 363 + extent = &map->extent[idx]; 364 + else 365 + extent = &map->forward[idx]; 366 + 367 + /* 368 + * Verify that the whole range of the mapping can be 369 + * resolved in the caller's idmapping. If it cannot be 370 + * resolved skip the mapping. 371 + */ 372 + lower = map_id_range_up(map_up, extent->lower_first, extent->count); 373 + if (lower == (uid_t) -1) 374 + continue; 375 + 376 + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); 377 + 378 + seq->count++; /* mappings are separated by \0 */ 379 + if (seq_has_overflowed(seq)) 380 + return -EAGAIN; 381 + 382 + nr_mappings++; 383 + } 384 + 385 + return nr_mappings; 386 + }

+26

fs/mount.h

··· 5 5 #include <linux/ns_common.h> 6 6 #include <linux/fs_pin.h> 7 7 8 + extern struct list_head notify_list; 9 + 8 10 struct mnt_namespace { 9 11 struct ns_common ns; 10 12 struct mount * root; ··· 23 21 struct rcu_head mnt_ns_rcu; 24 22 }; 25 23 u64 event; 24 + #ifdef CONFIG_FSNOTIFY 25 + __u32 n_fsnotify_mask; 26 + struct fsnotify_mark_connector __rcu *n_fsnotify_marks; 27 + #endif 26 28 unsigned int nr_mounts; /* # of mounts in the namespace */ 27 29 unsigned int pending_mounts; 28 30 struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ ··· 82 76 #ifdef CONFIG_FSNOTIFY 83 77 struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; 84 78 __u32 mnt_fsnotify_mask; 79 + struct list_head to_notify; /* need to queue notification */ 80 + struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */ 85 81 #endif 86 82 int mnt_id; /* mount identifier, reused */ 87 83 u64 mnt_id_unique; /* mount ID unique until reboot */ ··· 185 177 { 186 178 return container_of(ns, struct mnt_namespace, ns); 187 179 } 180 + 181 + #ifdef CONFIG_FSNOTIFY 182 + static inline void mnt_notify_add(struct mount *m) 183 + { 184 + /* Optimize the case where there are no watches */ 185 + if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) || 186 + (m->prev_ns && m->prev_ns->n_fsnotify_marks)) 187 + list_add_tail(&m->to_notify, &notify_list); 188 + else 189 + m->prev_ns = m->mnt_ns; 190 + } 191 + #else 192 + static inline void mnt_notify_add(struct mount *m) 193 + { 194 + } 195 + #endif 196 + 197 + struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);

+363 -122

fs/namespace.c

··· 81 81 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ 82 82 static DEFINE_SEQLOCK(mnt_ns_tree_lock); 83 83 84 + #ifdef CONFIG_FSNOTIFY 85 + LIST_HEAD(notify_list); /* protected by namespace_sem */ 86 + #endif 84 87 static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ 85 88 static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */ 89 + 90 + enum mount_kattr_flags_t { 91 + MOUNT_KATTR_RECURSE = (1 << 0), 92 + MOUNT_KATTR_IDMAP_REPLACE = (1 << 1), 93 + }; 86 94 87 95 struct mount_kattr { 88 96 unsigned int attr_set; 89 97 unsigned int attr_clr; 90 98 unsigned int propagation; 91 99 unsigned int lookup_flags; 92 - bool recurse; 100 + enum mount_kattr_flags_t kflags; 93 101 struct user_namespace *mnt_userns; 94 102 struct mnt_idmap *mnt_idmap; 95 103 }; ··· 171 163 { 172 164 /* keep alive for {list,stat}mount() */ 173 165 if (refcount_dec_and_test(&ns->passive)) { 166 + fsnotify_mntns_delete(ns); 174 167 put_user_ns(ns->user_ns); 175 168 kfree(ns); 176 169 } ··· 1185 1176 ns->mnt_first_node = &mnt->mnt_node; 1186 1177 rb_link_node(&mnt->mnt_node, parent, link); 1187 1178 rb_insert_color(&mnt->mnt_node, &ns->mounts); 1179 + 1180 + mnt_notify_add(mnt); 1188 1181 } 1189 1182 1190 1183 /* ··· 1734 1723 1735 1724 EXPORT_SYMBOL(may_umount); 1736 1725 1726 + #ifdef CONFIG_FSNOTIFY 1727 + static void mnt_notify(struct mount *p) 1728 + { 1729 + if (!p->prev_ns && p->mnt_ns) { 1730 + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); 1731 + } else if (p->prev_ns && !p->mnt_ns) { 1732 + fsnotify_mnt_detach(p->prev_ns, &p->mnt); 1733 + } else if (p->prev_ns == p->mnt_ns) { 1734 + fsnotify_mnt_move(p->mnt_ns, &p->mnt); 1735 + } else { 1736 + fsnotify_mnt_detach(p->prev_ns, &p->mnt); 1737 + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); 1738 + } 1739 + p->prev_ns = p->mnt_ns; 1740 + } 1741 + 1742 + static void notify_mnt_list(void) 1743 + { 1744 + struct mount *m, *tmp; 1745 + /* 1746 + * Notify about mounts that were added/reparented/detached/remain 1747 + * connected after unmount. 1748 + */ 1749 + list_for_each_entry_safe(m, tmp, &notify_list, to_notify) { 1750 + mnt_notify(m); 1751 + list_del_init(&m->to_notify); 1752 + } 1753 + } 1754 + 1755 + static bool need_notify_mnt_list(void) 1756 + { 1757 + return !list_empty(&notify_list); 1758 + } 1759 + #else 1760 + static void notify_mnt_list(void) 1761 + { 1762 + } 1763 + 1764 + static bool need_notify_mnt_list(void) 1765 + { 1766 + return false; 1767 + } 1768 + #endif 1769 + 1737 1770 static void namespace_unlock(void) 1738 1771 { 1739 1772 struct hlist_head head; ··· 1788 1733 hlist_move_list(&unmounted, &head); 1789 1734 list_splice_init(&ex_mountpoints, &list); 1790 1735 1791 - up_write(&namespace_sem); 1736 + if (need_notify_mnt_list()) { 1737 + /* 1738 + * No point blocking out concurrent readers while notifications 1739 + * are sent. This will also allow statmount()/listmount() to run 1740 + * concurrently. 1741 + */ 1742 + downgrade_write(&namespace_sem); 1743 + notify_mnt_list(); 1744 + up_read(&namespace_sem); 1745 + } else { 1746 + up_write(&namespace_sem); 1747 + } 1792 1748 1793 1749 shrink_dentry_list(&list); 1794 1750 ··· 1912 1846 change_mnt_propagation(p, MS_PRIVATE); 1913 1847 if (disconnect) 1914 1848 hlist_add_head(&p->mnt_umount, &unmounted); 1849 + 1850 + /* 1851 + * At this point p->mnt_ns is NULL, notification will be queued 1852 + * only if 1853 + * 1854 + * - p->prev_ns is non-NULL *and* 1855 + * - p->prev_ns->n_fsnotify_marks is non-NULL 1856 + * 1857 + * This will preclude queuing the mount if this is a cleanup 1858 + * after a failed copy_tree() or destruction of an anonymous 1859 + * namespace, etc. 1860 + */ 1861 + mnt_notify_add(p); 1915 1862 } 1916 1863 } 1917 1864 ··· 2105 2026 static int can_umount(const struct path *path, int flags) 2106 2027 { 2107 2028 struct mount *mnt = real_mount(path->mnt); 2029 + struct super_block *sb = path->dentry->d_sb; 2108 2030 2109 2031 if (!may_mount()) 2110 2032 return -EPERM; ··· 2115 2035 return -EINVAL; 2116 2036 if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ 2117 2037 return -EINVAL; 2118 - if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) 2038 + if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) 2119 2039 return -EPERM; 2120 2040 return 0; 2121 2041 } ··· 2225 2145 } 2226 2146 } 2227 2147 2148 + struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry) 2149 + { 2150 + if (!is_mnt_ns_file(dentry)) 2151 + return NULL; 2152 + 2153 + return to_mnt_ns(get_proc_ns(dentry->d_inode)); 2154 + } 2155 + 2228 2156 static bool mnt_ns_loop(struct dentry *dentry) 2229 2157 { 2230 2158 /* Could bind mounting the mount namespace inode cause a 2231 2159 * mount namespace loop? 2232 2160 */ 2233 - struct mnt_namespace *mnt_ns; 2234 - if (!is_mnt_ns_file(dentry)) 2161 + struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry); 2162 + 2163 + if (!mnt_ns) 2235 2164 return false; 2236 2165 2237 - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); 2238 2166 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 2239 2167 } 2240 2168 ··· 2375 2287 return false; 2376 2288 } 2377 2289 2290 + /* 2291 + * Check that there aren't references to earlier/same mount namespaces in the 2292 + * specified subtree. Such references can act as pins for mount namespaces 2293 + * that aren't checked by the mount-cycle checking code, thereby allowing 2294 + * cycles to be made. 2295 + */ 2296 + static bool check_for_nsfs_mounts(struct mount *subtree) 2297 + { 2298 + struct mount *p; 2299 + bool ret = false; 2300 + 2301 + lock_mount_hash(); 2302 + for (p = subtree; p; p = next_mnt(p, subtree)) 2303 + if (mnt_ns_loop(p->mnt.mnt_root)) 2304 + goto out; 2305 + 2306 + ret = true; 2307 + out: 2308 + unlock_mount_hash(); 2309 + return ret; 2310 + } 2311 + 2378 2312 /** 2379 2313 * clone_private_mount - create a private clone of a path 2380 2314 * @path: path to clone ··· 2405 2295 * will not be attached anywhere in the namespace and will be private (i.e. 2406 2296 * changes to the originating mount won't be propagated into this). 2407 2297 * 2298 + * This assumes caller has called or done the equivalent of may_mount(). 2299 + * 2408 2300 * Release with mntput(). 2409 2301 */ 2410 2302 struct vfsmount *clone_private_mount(const struct path *path) ··· 2414 2302 struct mount *old_mnt = real_mount(path->mnt); 2415 2303 struct mount *new_mnt; 2416 2304 2417 - down_read(&namespace_sem); 2305 + scoped_guard(rwsem_read, &namespace_sem) 2418 2306 if (IS_MNT_UNBINDABLE(old_mnt)) 2419 - goto invalid; 2307 + return ERR_PTR(-EINVAL); 2420 2308 2421 - if (!check_mnt(old_mnt)) 2422 - goto invalid; 2309 + if (mnt_has_parent(old_mnt)) { 2310 + if (!check_mnt(old_mnt)) 2311 + return ERR_PTR(-EINVAL); 2312 + } else { 2313 + if (!is_mounted(&old_mnt->mnt)) 2314 + return ERR_PTR(-EINVAL); 2315 + 2316 + /* Make sure this isn't something purely kernel internal. */ 2317 + if (!is_anon_ns(old_mnt->mnt_ns)) 2318 + return ERR_PTR(-EINVAL); 2319 + 2320 + /* Make sure we don't create mount namespace loops. */ 2321 + if (!check_for_nsfs_mounts(old_mnt)) 2322 + return ERR_PTR(-EINVAL); 2323 + } 2423 2324 2424 2325 if (has_locked_children(old_mnt, path->dentry)) 2425 - goto invalid; 2326 + return ERR_PTR(-EINVAL); 2426 2327 2427 2328 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); 2428 - up_read(&namespace_sem); 2429 - 2430 2329 if (IS_ERR(new_mnt)) 2431 - return ERR_CAST(new_mnt); 2330 + return ERR_PTR(-EINVAL); 2432 2331 2433 2332 /* Longterm mount to be removed by kern_unmount*() */ 2434 2333 new_mnt->mnt_ns = MNT_NS_INTERNAL; 2435 - 2436 2334 return &new_mnt->mnt; 2437 - 2438 - invalid: 2439 - up_read(&namespace_sem); 2440 - return ERR_PTR(-EINVAL); 2441 2335 } 2442 2336 EXPORT_SYMBOL_GPL(clone_private_mount); 2443 2337 ··· 2665 2547 dest_mp = smp; 2666 2548 unhash_mnt(source_mnt); 2667 2549 attach_mnt(source_mnt, top_mnt, dest_mp, beneath); 2550 + mnt_notify_add(source_mnt); 2668 2551 touch_mnt_namespace(source_mnt->mnt_ns); 2669 2552 } else { 2670 2553 if (source_mnt->mnt_ns) { ··· 3008 2889 return file; 3009 2890 } 3010 2891 3011 - SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) 2892 + static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags) 3012 2893 { 3013 - struct file *file; 3014 - struct path path; 2894 + int ret; 2895 + struct path path __free(path_put) = {}; 3015 2896 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; 3016 2897 bool detached = flags & OPEN_TREE_CLONE; 3017 - int error; 3018 - int fd; 3019 2898 3020 2899 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); 3021 2900 3022 2901 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | 3023 2902 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | 3024 2903 OPEN_TREE_CLOEXEC)) 3025 - return -EINVAL; 2904 + return ERR_PTR(-EINVAL); 3026 2905 3027 2906 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) 3028 - return -EINVAL; 2907 + return ERR_PTR(-EINVAL); 3029 2908 3030 2909 if (flags & AT_NO_AUTOMOUNT) 3031 2910 lookup_flags &= ~LOOKUP_AUTOMOUNT; ··· 3033 2916 lookup_flags |= LOOKUP_EMPTY; 3034 2917 3035 2918 if (detached && !may_mount()) 3036 - return -EPERM; 2919 + return ERR_PTR(-EPERM); 2920 + 2921 + ret = user_path_at(dfd, filename, lookup_flags, &path); 2922 + if (unlikely(ret)) 2923 + return ERR_PTR(ret); 2924 + 2925 + if (detached) 2926 + return open_detached_copy(&path, flags & AT_RECURSIVE); 2927 + 2928 + return dentry_open(&path, O_PATH, current_cred()); 2929 + } 2930 + 2931 + SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) 2932 + { 2933 + int fd; 2934 + struct file *file __free(fput) = NULL; 2935 + 2936 + file = vfs_open_tree(dfd, filename, flags); 2937 + if (IS_ERR(file)) 2938 + return PTR_ERR(file); 3037 2939 3038 2940 fd = get_unused_fd_flags(flags & O_CLOEXEC); 3039 2941 if (fd < 0) 3040 2942 return fd; 3041 2943 3042 - error = user_path_at(dfd, filename, lookup_flags, &path); 3043 - if (unlikely(error)) { 3044 - file = ERR_PTR(error); 3045 - } else { 3046 - if (detached) 3047 - file = open_detached_copy(&path, flags & AT_RECURSIVE); 3048 - else 3049 - file = dentry_open(&path, O_PATH, current_cred()); 3050 - path_put(&path); 3051 - } 3052 - if (IS_ERR(file)) { 3053 - put_unused_fd(fd); 3054 - return PTR_ERR(file); 3055 - } 3056 - fd_install(fd, file); 2944 + fd_install(fd, no_free_ptr(file)); 3057 2945 return fd; 3058 2946 } 3059 2947 ··· 3243 3121 return 1; 3244 3122 } 3245 3123 return 0; 3246 - } 3247 - 3248 - /* 3249 - * Check that there aren't references to earlier/same mount namespaces in the 3250 - * specified subtree. Such references can act as pins for mount namespaces 3251 - * that aren't checked by the mount-cycle checking code, thereby allowing 3252 - * cycles to be made. 3253 - */ 3254 - static bool check_for_nsfs_mounts(struct mount *subtree) 3255 - { 3256 - struct mount *p; 3257 - bool ret = false; 3258 - 3259 - lock_mount_hash(); 3260 - for (p = subtree; p; p = next_mnt(p, subtree)) 3261 - if (mnt_ns_loop(p->mnt.mnt_root)) 3262 - goto out; 3263 - 3264 - ret = true; 3265 - out: 3266 - unlock_mount_hash(); 3267 - return ret; 3268 3124 } 3269 3125 3270 3126 static int do_set_group(struct path *from_path, struct path *to_path) ··· 4568 4468 list_del_init(&new_mnt->mnt_expire); 4569 4469 put_mountpoint(root_mp); 4570 4470 unlock_mount_hash(); 4471 + mnt_notify_add(root_mnt); 4472 + mnt_notify_add(new_mnt); 4571 4473 chroot_fs_refs(&root, &new); 4572 4474 error = 0; 4573 4475 out4: ··· 4614 4512 return -EINVAL; 4615 4513 4616 4514 /* 4617 - * Once a mount has been idmapped we don't allow it to change its 4618 - * mapping. It makes things simpler and callers can just create 4619 - * another bind-mount they can idmap if they want to. 4515 + * We only allow an mount to change it's idmapping if it has 4516 + * never been accessible to userspace. 4620 4517 */ 4621 - if (is_idmapped_mnt(m)) 4518 + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m)) 4622 4519 return -EPERM; 4623 4520 4624 4521 /* The underlying filesystem doesn't support idmapped mounts yet. */ ··· 4677 4576 break; 4678 4577 } 4679 4578 4680 - if (!kattr->recurse) 4579 + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) 4681 4580 return 0; 4682 4581 } 4683 4582 ··· 4707 4606 4708 4607 static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) 4709 4608 { 4609 + struct mnt_idmap *old_idmap; 4610 + 4710 4611 if (!kattr->mnt_idmap) 4711 4612 return; 4712 4613 4713 - /* 4714 - * Pairs with smp_load_acquire() in mnt_idmap(). 4715 - * 4716 - * Since we only allow a mount to change the idmapping once and 4717 - * verified this in can_idmap_mount() we know that the mount has 4718 - * @nop_mnt_idmap attached to it. So there's no need to drop any 4719 - * references. 4720 - */ 4614 + old_idmap = mnt_idmap(&mnt->mnt); 4615 + 4616 + /* Pairs with smp_load_acquire() in mnt_idmap(). */ 4721 4617 smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); 4618 + mnt_idmap_put(old_idmap); 4722 4619 } 4723 4620 4724 4621 static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) ··· 4736 4637 4737 4638 if (kattr->propagation) 4738 4639 change_mnt_propagation(m, kattr->propagation); 4739 - if (!kattr->recurse) 4640 + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) 4740 4641 break; 4741 4642 } 4742 4643 touch_mnt_namespace(mnt->mnt_ns); ··· 4766 4667 */ 4767 4668 namespace_lock(); 4768 4669 if (kattr->propagation == MS_SHARED) { 4769 - err = invent_group_ids(mnt, kattr->recurse); 4670 + err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE); 4770 4671 if (err) { 4771 4672 namespace_unlock(); 4772 4673 return err; ··· 4817 4718 } 4818 4719 4819 4720 static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, 4820 - struct mount_kattr *kattr, unsigned int flags) 4721 + struct mount_kattr *kattr) 4821 4722 { 4822 4723 struct ns_common *ns; 4823 4724 struct user_namespace *mnt_userns; ··· 4825 4726 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) 4826 4727 return 0; 4827 4728 4828 - /* 4829 - * We currently do not support clearing an idmapped mount. If this ever 4830 - * is a use-case we can revisit this but for now let's keep it simple 4831 - * and not allow it. 4832 - */ 4833 - if (attr->attr_clr & MOUNT_ATTR_IDMAP) 4834 - return -EINVAL; 4729 + if (attr->attr_clr & MOUNT_ATTR_IDMAP) { 4730 + /* 4731 + * We can only remove an idmapping if it's never been 4732 + * exposed to userspace. 4733 + */ 4734 + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE)) 4735 + return -EINVAL; 4736 + 4737 + /* 4738 + * Removal of idmappings is equivalent to setting 4739 + * nop_mnt_idmap. 4740 + */ 4741 + if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) { 4742 + kattr->mnt_idmap = &nop_mnt_idmap; 4743 + return 0; 4744 + } 4745 + } 4835 4746 4836 4747 if (attr->userns_fd > INT_MAX) 4837 4748 return -EINVAL; ··· 4878 4769 } 4879 4770 4880 4771 static int build_mount_kattr(const struct mount_attr *attr, size_t usize, 4881 - struct mount_kattr *kattr, unsigned int flags) 4772 + struct mount_kattr *kattr) 4882 4773 { 4883 - unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; 4884 - 4885 - if (flags & AT_NO_AUTOMOUNT) 4886 - lookup_flags &= ~LOOKUP_AUTOMOUNT; 4887 - if (flags & AT_SYMLINK_NOFOLLOW) 4888 - lookup_flags &= ~LOOKUP_FOLLOW; 4889 - if (flags & AT_EMPTY_PATH) 4890 - lookup_flags |= LOOKUP_EMPTY; 4891 - 4892 - *kattr = (struct mount_kattr) { 4893 - .lookup_flags = lookup_flags, 4894 - .recurse = !!(flags & AT_RECURSIVE), 4895 - }; 4896 - 4897 4774 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) 4898 4775 return -EINVAL; 4899 4776 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) ··· 4927 4832 return -EINVAL; 4928 4833 } 4929 4834 4930 - return build_mount_idmapped(attr, usize, kattr, flags); 4835 + return build_mount_idmapped(attr, usize, kattr); 4931 4836 } 4932 4837 4933 4838 static void finish_mount_kattr(struct mount_kattr *kattr) 4934 4839 { 4935 - put_user_ns(kattr->mnt_userns); 4936 - kattr->mnt_userns = NULL; 4840 + if (kattr->mnt_userns) { 4841 + put_user_ns(kattr->mnt_userns); 4842 + kattr->mnt_userns = NULL; 4843 + } 4937 4844 4938 4845 if (kattr->mnt_idmap) 4939 4846 mnt_idmap_put(kattr->mnt_idmap); 4940 4847 } 4941 4848 4942 - SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, 4943 - unsigned int, flags, struct mount_attr __user *, uattr, 4944 - size_t, usize) 4849 + static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, 4850 + struct mount_kattr *kattr) 4945 4851 { 4946 - int err; 4947 - struct path target; 4852 + int ret; 4948 4853 struct mount_attr attr; 4949 - struct mount_kattr kattr; 4950 4854 4951 4855 BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); 4952 - 4953 - if (flags & ~(AT_EMPTY_PATH | 4954 - AT_RECURSIVE | 4955 - AT_SYMLINK_NOFOLLOW | 4956 - AT_NO_AUTOMOUNT)) 4957 - return -EINVAL; 4958 4856 4959 4857 if (unlikely(usize > PAGE_SIZE)) 4960 4858 return -E2BIG; ··· 4957 4869 if (!may_mount()) 4958 4870 return -EPERM; 4959 4871 4960 - err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); 4961 - if (err) 4962 - return err; 4872 + ret = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); 4873 + if (ret) 4874 + return ret; 4963 4875 4964 4876 /* Don't bother walking through the mounts if this is a nop. */ 4965 4877 if (attr.attr_set == 0 && ··· 4967 4879 attr.propagation == 0) 4968 4880 return 0; 4969 4881 4970 - err = build_mount_kattr(&attr, usize, &kattr, flags); 4882 + return build_mount_kattr(&attr, usize, kattr); 4883 + } 4884 + 4885 + SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, 4886 + unsigned int, flags, struct mount_attr __user *, uattr, 4887 + size_t, usize) 4888 + { 4889 + int err; 4890 + struct path target; 4891 + struct mount_kattr kattr; 4892 + unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; 4893 + 4894 + if (flags & ~(AT_EMPTY_PATH | 4895 + AT_RECURSIVE | 4896 + AT_SYMLINK_NOFOLLOW | 4897 + AT_NO_AUTOMOUNT)) 4898 + return -EINVAL; 4899 + 4900 + if (flags & AT_NO_AUTOMOUNT) 4901 + lookup_flags &= ~LOOKUP_AUTOMOUNT; 4902 + if (flags & AT_SYMLINK_NOFOLLOW) 4903 + lookup_flags &= ~LOOKUP_FOLLOW; 4904 + if (flags & AT_EMPTY_PATH) 4905 + lookup_flags |= LOOKUP_EMPTY; 4906 + 4907 + kattr = (struct mount_kattr) { 4908 + .lookup_flags = lookup_flags, 4909 + }; 4910 + 4911 + if (flags & AT_RECURSIVE) 4912 + kattr.kflags |= MOUNT_KATTR_RECURSE; 4913 + 4914 + err = copy_mount_setattr(uattr, usize, &kattr); 4971 4915 if (err) 4972 4916 return err; 4973 4917 ··· 5010 4890 } 5011 4891 finish_mount_kattr(&kattr); 5012 4892 return err; 4893 + } 4894 + 4895 + SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, 4896 + unsigned, flags, struct mount_attr __user *, uattr, 4897 + size_t, usize) 4898 + { 4899 + struct file __free(fput) *file = NULL; 4900 + int fd; 4901 + 4902 + if (!uattr && usize) 4903 + return -EINVAL; 4904 + 4905 + file = vfs_open_tree(dfd, filename, flags); 4906 + if (IS_ERR(file)) 4907 + return PTR_ERR(file); 4908 + 4909 + if (uattr) { 4910 + int ret; 4911 + struct mount_kattr kattr = {}; 4912 + 4913 + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; 4914 + if (flags & AT_RECURSIVE) 4915 + kattr.kflags |= MOUNT_KATTR_RECURSE; 4916 + 4917 + ret = copy_mount_setattr(uattr, usize, &kattr); 4918 + if (ret) 4919 + return ret; 4920 + 4921 + ret = do_mount_setattr(&file->f_path, &kattr); 4922 + if (ret) 4923 + return ret; 4924 + 4925 + finish_mount_kattr(&kattr); 4926 + } 4927 + 4928 + fd = get_unused_fd_flags(flags & O_CLOEXEC); 4929 + if (fd < 0) 4930 + return fd; 4931 + 4932 + fd_install(fd, no_free_ptr(file)); 4933 + return fd; 5013 4934 } 5014 4935 5015 4936 int show_path(struct seq_file *m, struct dentry *root) ··· 5076 4915 struct statmount __user *buf; 5077 4916 size_t bufsize; 5078 4917 struct vfsmount *mnt; 4918 + struct mnt_idmap *idmap; 5079 4919 u64 mask; 5080 4920 struct path root; 5081 4921 struct statmount sm; ··· 5346 5184 return 0; 5347 5185 } 5348 5186 5187 + static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) 5188 + { 5189 + int ret; 5190 + 5191 + ret = statmount_mnt_idmap(s->idmap, seq, true); 5192 + if (ret < 0) 5193 + return ret; 5194 + 5195 + s->sm.mnt_uidmap_num = ret; 5196 + /* 5197 + * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid 5198 + * mappings. This allows userspace to distinguish between a 5199 + * non-idmapped mount and an idmapped mount where none of the 5200 + * individual mappings are valid in the caller's idmapping. 5201 + */ 5202 + if (is_valid_mnt_idmap(s->idmap)) 5203 + s->sm.mask |= STATMOUNT_MNT_UIDMAP; 5204 + return 0; 5205 + } 5206 + 5207 + static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) 5208 + { 5209 + int ret; 5210 + 5211 + ret = statmount_mnt_idmap(s->idmap, seq, false); 5212 + if (ret < 0) 5213 + return ret; 5214 + 5215 + s->sm.mnt_gidmap_num = ret; 5216 + /* 5217 + * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid 5218 + * mappings. This allows userspace to distinguish between a 5219 + * non-idmapped mount and an idmapped mount where none of the 5220 + * individual mappings are valid in the caller's idmapping. 5221 + */ 5222 + if (is_valid_mnt_idmap(s->idmap)) 5223 + s->sm.mask |= STATMOUNT_MNT_GIDMAP; 5224 + return 0; 5225 + } 5226 + 5349 5227 static int statmount_string(struct kstatmount *s, u64 flag) 5350 5228 { 5351 5229 int ret = 0; ··· 5432 5230 case STATMOUNT_SB_SOURCE: 5433 5231 offp = &sm->sb_source; 5434 5232 ret = statmount_sb_source(s, seq); 5233 + break; 5234 + case STATMOUNT_MNT_UIDMAP: 5235 + sm->mnt_uidmap = start; 5236 + ret = statmount_mnt_uidmap(s, seq); 5237 + break; 5238 + case STATMOUNT_MNT_GIDMAP: 5239 + sm->mnt_gidmap = start; 5240 + ret = statmount_mnt_gidmap(s, seq); 5435 5241 break; 5436 5242 default: 5437 5243 WARN_ON_ONCE(true); ··· 5533 5323 return 0; 5534 5324 } 5535 5325 5326 + /* This must be updated whenever a new flag is added */ 5327 + #define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \ 5328 + STATMOUNT_MNT_BASIC | \ 5329 + STATMOUNT_PROPAGATE_FROM | \ 5330 + STATMOUNT_MNT_ROOT | \ 5331 + STATMOUNT_MNT_POINT | \ 5332 + STATMOUNT_FS_TYPE | \ 5333 + STATMOUNT_MNT_NS_ID | \ 5334 + STATMOUNT_MNT_OPTS | \ 5335 + STATMOUNT_FS_SUBTYPE | \ 5336 + STATMOUNT_SB_SOURCE | \ 5337 + STATMOUNT_OPT_ARRAY | \ 5338 + STATMOUNT_OPT_SEC_ARRAY | \ 5339 + STATMOUNT_SUPPORTED_MASK) 5340 + 5536 5341 static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, 5537 5342 struct mnt_namespace *ns) 5538 5343 { ··· 5581 5356 return err; 5582 5357 5583 5358 s->root = root; 5359 + s->idmap = mnt_idmap(s->mnt); 5584 5360 if (s->mask & STATMOUNT_SB_BASIC) 5585 5361 statmount_sb_basic(s); 5586 5362 ··· 5615 5389 if (!err && s->mask & STATMOUNT_SB_SOURCE) 5616 5390 err = statmount_string(s, STATMOUNT_SB_SOURCE); 5617 5391 5392 + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) 5393 + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); 5394 + 5395 + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) 5396 + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); 5397 + 5618 5398 if (!err && s->mask & STATMOUNT_MNT_NS_ID) 5619 5399 statmount_mnt_ns_id(s, ns); 5620 5400 5401 + if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) { 5402 + s->sm.mask |= STATMOUNT_SUPPORTED_MASK; 5403 + s->sm.supported_mask = STATMOUNT_SUPPORTED; 5404 + } 5405 + 5621 5406 if (err) 5622 5407 return err; 5408 + 5409 + /* Are there bits in the return mask not present in STATMOUNT_SUPPORTED? */ 5410 + WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask); 5623 5411 5624 5412 return 0; 5625 5413 } ··· 5652 5412 #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ 5653 5413 STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ 5654 5414 STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ 5655 - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) 5415 + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ 5416 + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) 5656 5417 5657 5418 static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, 5658 5419 struct statmount __user *buf, size_t bufsize,

+35 -3

fs/notify/fanotify/fanotify.c

··· 166 166 case FANOTIFY_EVENT_TYPE_FS_ERROR: 167 167 return fanotify_error_event_equal(FANOTIFY_EE(old), 168 168 FANOTIFY_EE(new)); 169 + case FANOTIFY_EVENT_TYPE_MNT: 170 + return false; 169 171 default: 170 172 WARN_ON_ONCE(1); 171 173 } ··· 314 312 pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", 315 313 __func__, iter_info->report_mask, event_mask, data, data_type); 316 314 317 - if (!fid_mode) { 315 + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { 316 + if (data_type != FSNOTIFY_EVENT_MNT) 317 + return 0; 318 + } else if (!fid_mode) { 318 319 /* Do we have path to open a file descriptor? */ 319 320 if (!path) 320 321 return 0; ··· 562 557 return &pevent->fae; 563 558 } 564 559 560 + static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) 561 + { 562 + struct fanotify_mnt_event *pevent; 563 + 564 + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); 565 + if (!pevent) 566 + return NULL; 567 + 568 + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; 569 + pevent->mnt_id = mnt_id; 570 + 571 + return &pevent->fae; 572 + } 573 + 565 574 static struct fanotify_event *fanotify_alloc_perm_event(const void *data, 566 575 int data_type, 567 576 gfp_t gfp) ··· 750 731 fid_mode); 751 732 struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); 752 733 const struct path *path = fsnotify_data_path(data, data_type); 734 + u64 mnt_id = fsnotify_data_mnt_id(data, data_type); 753 735 struct mem_cgroup *old_memcg; 754 736 struct dentry *moved = NULL; 755 737 struct inode *child = NULL; ··· 846 826 moved, &hash, gfp); 847 827 } else if (fid_mode) { 848 828 event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); 849 - } else { 829 + } else if (path) { 850 830 event = fanotify_alloc_path_event(path, &hash, gfp); 831 + } else if (mnt_id) { 832 + event = fanotify_alloc_mnt_event(mnt_id, gfp); 833 + } else { 834 + WARN_ON_ONCE(1); 851 835 } 852 836 853 837 if (!event) ··· 951 927 BUILD_BUG_ON(FAN_RENAME != FS_RENAME); 952 928 BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); 953 929 954 - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); 930 + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24); 955 931 956 932 mask = fanotify_group_event_mask(group, iter_info, &match_mask, 957 933 mask, data, data_type, dir); ··· 1052 1028 mempool_free(fee, &group->fanotify_data.error_events_pool); 1053 1029 } 1054 1030 1031 + static void fanotify_free_mnt_event(struct fanotify_event *event) 1032 + { 1033 + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); 1034 + } 1035 + 1055 1036 static void fanotify_free_event(struct fsnotify_group *group, 1056 1037 struct fsnotify_event *fsn_event) 1057 1038 { ··· 1082 1053 break; 1083 1054 case FANOTIFY_EVENT_TYPE_FS_ERROR: 1084 1055 fanotify_free_error_event(group, event); 1056 + break; 1057 + case FANOTIFY_EVENT_TYPE_MNT: 1058 + fanotify_free_mnt_event(event); 1085 1059 break; 1086 1060 default: 1087 1061 WARN_ON_ONCE(1);

+18

fs/notify/fanotify/fanotify.h

··· 9 9 extern struct kmem_cache *fanotify_fid_event_cachep; 10 10 extern struct kmem_cache *fanotify_path_event_cachep; 11 11 extern struct kmem_cache *fanotify_perm_event_cachep; 12 + extern struct kmem_cache *fanotify_mnt_event_cachep; 12 13 13 14 /* Possible states of the permission event */ 14 15 enum { ··· 245 244 FANOTIFY_EVENT_TYPE_PATH_PERM, 246 245 FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ 247 246 FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ 247 + FANOTIFY_EVENT_TYPE_MNT, 248 248 __FANOTIFY_EVENT_TYPE_NUM 249 249 }; 250 250 ··· 411 409 struct path path; 412 410 }; 413 411 412 + struct fanotify_mnt_event { 413 + struct fanotify_event fae; 414 + u64 mnt_id; 415 + }; 416 + 414 417 static inline struct fanotify_path_event * 415 418 FANOTIFY_PE(struct fanotify_event *event) 416 419 { 417 420 return container_of(event, struct fanotify_path_event, fae); 421 + } 422 + 423 + static inline struct fanotify_mnt_event * 424 + FANOTIFY_ME(struct fanotify_event *event) 425 + { 426 + return container_of(event, struct fanotify_mnt_event, fae); 418 427 } 419 428 420 429 /* ··· 477 464 static inline bool fanotify_is_error_event(u32 mask) 478 465 { 479 466 return mask & FAN_FS_ERROR; 467 + } 468 + 469 + static inline bool fanotify_is_mnt_event(u32 mask) 470 + { 471 + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH); 480 472 } 481 473 482 474 static inline const struct path *fanotify_event_path(struct fanotify_event *event)

+75 -14

fs/notify/fanotify/fanotify_user.c

··· 113 113 struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; 114 114 struct kmem_cache *fanotify_path_event_cachep __ro_after_init; 115 115 struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; 116 + struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; 116 117 117 118 #define FANOTIFY_EVENT_ALIGN 4 118 119 #define FANOTIFY_FID_INFO_HDR_LEN \ ··· 124 123 (sizeof(struct fanotify_event_info_error)) 125 124 #define FANOTIFY_RANGE_INFO_LEN \ 126 125 (sizeof(struct fanotify_event_info_range)) 126 + #define FANOTIFY_MNT_INFO_LEN \ 127 + (sizeof(struct fanotify_event_info_mnt)) 127 128 128 129 static int fanotify_fid_info_len(int fh_len, int name_len) 129 130 { ··· 181 178 fh_len = fanotify_event_object_fh_len(event); 182 179 event_len += fanotify_fid_info_len(fh_len, dot_len); 183 180 } 181 + if (fanotify_is_mnt_event(event->mask)) 182 + event_len += FANOTIFY_MNT_INFO_LEN; 184 183 185 184 if (info_mode & FAN_REPORT_PIDFD) 186 185 event_len += FANOTIFY_PIDFD_INFO_LEN; ··· 408 403 spin_unlock(&group->notification_lock); 409 404 410 405 return -ENOENT; 406 + } 407 + 408 + static size_t copy_mnt_info_to_user(struct fanotify_event *event, 409 + char __user *buf, int count) 410 + { 411 + struct fanotify_event_info_mnt info = { }; 412 + 413 + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; 414 + info.hdr.len = FANOTIFY_MNT_INFO_LEN; 415 + 416 + if (WARN_ON(count < info.hdr.len)) 417 + return -EFAULT; 418 + 419 + info.mnt_id = FANOTIFY_ME(event)->mnt_id; 420 + 421 + if (copy_to_user(buf, &info, sizeof(info))) 422 + return -EFAULT; 423 + 424 + return info.hdr.len; 411 425 } 412 426 413 427 static size_t copy_error_info_to_user(struct fanotify_event *event, ··· 717 693 718 694 if (fanotify_event_has_access_range(event)) { 719 695 ret = copy_range_info_to_user(event, buf, count); 696 + if (ret < 0) 697 + return ret; 698 + buf += ret; 699 + count -= ret; 700 + total_bytes += ret; 701 + } 702 + 703 + if (fanotify_is_mnt_event(event->mask)) { 704 + ret = copy_mnt_info_to_user(event, buf, count); 720 705 if (ret < 0) 721 706 return ret; 722 707 buf += ret; ··· 1541 1508 if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) 1542 1509 return -EINVAL; 1543 1510 1511 + /* Don't allow mixing mnt events with inode events for now */ 1512 + if (flags & FAN_REPORT_MNT) { 1513 + if (class != FAN_CLASS_NOTIF) 1514 + return -EINVAL; 1515 + if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) 1516 + return -EINVAL; 1517 + } 1518 + 1544 1519 if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) 1545 1520 return -EINVAL; 1546 1521 ··· 1808 1767 int dfd, const char __user *pathname) 1809 1768 { 1810 1769 struct inode *inode = NULL; 1811 - struct vfsmount *mnt = NULL; 1812 1770 struct fsnotify_group *group; 1813 1771 struct path path; 1814 1772 struct fan_fsid __fsid, *fsid = NULL; ··· 1816 1776 unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; 1817 1777 unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; 1818 1778 unsigned int obj_type, fid_mode; 1819 - void *obj; 1779 + void *obj = NULL; 1820 1780 u32 umask = 0; 1821 1781 int ret; 1822 1782 ··· 1839 1799 break; 1840 1800 case FAN_MARK_FILESYSTEM: 1841 1801 obj_type = FSNOTIFY_OBJ_TYPE_SB; 1802 + break; 1803 + case FAN_MARK_MNTNS: 1804 + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; 1842 1805 break; 1843 1806 default: 1844 1807 return -EINVAL; ··· 1890 1847 return -EINVAL; 1891 1848 group = fd_file(f)->private_data; 1892 1849 1850 + /* Only report mount events on mnt namespace */ 1851 + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { 1852 + if (mask & ~FANOTIFY_MOUNT_EVENTS) 1853 + return -EINVAL; 1854 + if (mark_type != FAN_MARK_MNTNS) 1855 + return -EINVAL; 1856 + } else { 1857 + if (mask & FANOTIFY_MOUNT_EVENTS) 1858 + return -EINVAL; 1859 + if (mark_type == FAN_MARK_MNTNS) 1860 + return -EINVAL; 1861 + } 1862 + 1893 1863 /* 1894 1864 * An unprivileged user is not allowed to setup mount nor filesystem 1895 1865 * marks. This also includes setting up such marks by a group that ··· 1944 1888 * point. 1945 1889 */ 1946 1890 fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); 1947 - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && 1891 + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && 1948 1892 (!fid_mode || mark_type == FAN_MARK_MOUNT)) 1949 1893 return -EINVAL; 1950 1894 ··· 1994 1938 } 1995 1939 1996 1940 /* inode held in place by reference to path; group by fget on fd */ 1997 - if (mark_type == FAN_MARK_INODE) { 1941 + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { 1998 1942 inode = path.dentry->d_inode; 1999 1943 obj = inode; 2000 - } else { 2001 - mnt = path.mnt; 2002 - if (mark_type == FAN_MARK_MOUNT) 2003 - obj = mnt; 2004 - else 2005 - obj = mnt->mnt_sb; 1944 + } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 1945 + obj = path.mnt; 1946 + } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { 1947 + obj = path.mnt->mnt_sb; 1948 + } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { 1949 + obj = mnt_ns_from_dentry(path.dentry); 2006 1950 } 1951 + 1952 + ret = -EINVAL; 1953 + if (!obj) 1954 + goto path_put_and_out; 2007 1955 2008 1956 /* 2009 1957 * If some other task has this inode open for write we should not add ··· 2016 1956 */ 2017 1957 if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && 2018 1958 !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { 2019 - ret = mnt ? -EINVAL : -EISDIR; 1959 + ret = !inode ? -EINVAL : -EISDIR; 2020 1960 /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ 2021 1961 if (ignore == FAN_MARK_IGNORE && 2022 - (mnt || S_ISDIR(inode->i_mode))) 1962 + (!inode || S_ISDIR(inode->i_mode))) 2023 1963 goto path_put_and_out; 2024 1964 2025 1965 ret = 0; ··· 2028 1968 } 2029 1969 2030 1970 /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ 2031 - if (mnt || !S_ISDIR(inode->i_mode)) { 1971 + if (!inode || !S_ISDIR(inode->i_mode)) { 2032 1972 mask &= ~FAN_EVENT_ON_CHILD; 2033 1973 umask = FAN_EVENT_ON_CHILD; 2034 1974 /* ··· 2102 2042 FANOTIFY_DEFAULT_MAX_USER_MARKS); 2103 2043 2104 2044 BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); 2105 - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13); 2045 + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); 2106 2046 BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); 2107 2047 2108 2048 fanotify_mark_cache = KMEM_CACHE(fanotify_mark, ··· 2115 2055 fanotify_perm_event_cachep = 2116 2056 KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); 2117 2057 } 2058 + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); 2118 2059 2119 2060 fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; 2120 2061 init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =

+5

fs/notify/fdinfo.c

··· 121 121 122 122 seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", 123 123 sb->s_dev, mflags, mark->mask, mark->ignore_mask); 124 + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) { 125 + struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector); 126 + 127 + seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n", 128 + mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask); 124 129 } 125 130 } 126 131

+40 -7

fs/notify/fsnotify.c

··· 28 28 fsnotify_clear_marks_by_mount(mnt); 29 29 } 30 30 31 + void __fsnotify_mntns_delete(struct mnt_namespace *mntns) 32 + { 33 + fsnotify_clear_marks_by_mntns(mntns); 34 + } 35 + 31 36 /** 32 37 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 33 38 * @sb: superblock being unmounted. ··· 425 420 file_name, cookie, iter_info); 426 421 } 427 422 428 - static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) 423 + static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) 429 424 { 430 425 struct fsnotify_mark_connector *conn; 431 426 struct hlist_node *node = NULL; ··· 543 538 { 544 539 const struct path *path = fsnotify_data_path(data, data_type); 545 540 struct super_block *sb = fsnotify_data_sb(data, data_type); 546 - struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 541 + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); 542 + struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; 547 543 struct fsnotify_iter_info iter_info = {}; 548 544 struct mount *mnt = NULL; 549 545 struct inode *inode2 = NULL; 550 546 struct dentry *moved; 551 547 int inode2_type; 552 548 int ret = 0; 553 - __u32 test_mask, marks_mask; 549 + __u32 test_mask, marks_mask = 0; 554 550 555 551 if (path) 556 552 mnt = real_mount(path->mnt); ··· 584 578 if ((!sbinfo || !sbinfo->sb_marks) && 585 579 (!mnt || !mnt->mnt_fsnotify_marks) && 586 580 (!inode || !inode->i_fsnotify_marks) && 587 - (!inode2 || !inode2->i_fsnotify_marks)) 581 + (!inode2 || !inode2->i_fsnotify_marks) && 582 + (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) 588 583 return 0; 589 584 590 - marks_mask = READ_ONCE(sb->s_fsnotify_mask); 585 + if (sb) 586 + marks_mask |= READ_ONCE(sb->s_fsnotify_mask); 591 587 if (mnt) 592 588 marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); 593 589 if (inode) 594 590 marks_mask |= READ_ONCE(inode->i_fsnotify_mask); 595 591 if (inode2) 596 592 marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); 597 - 593 + if (mnt_data) 594 + marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); 598 595 599 596 /* 600 597 * If this is a modify event we may need to clear some ignore masks. ··· 626 617 if (inode2) { 627 618 iter_info.marks[inode2_type] = 628 619 fsnotify_first_mark(&inode2->i_fsnotify_marks); 620 + } 621 + if (mnt_data) { 622 + iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = 623 + fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); 629 624 } 630 625 631 626 /* ··· 721 708 } 722 709 #endif 723 710 711 + void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) 712 + { 713 + struct fsnotify_mnt data = { 714 + .ns = ns, 715 + .mnt_id = real_mount(mnt)->mnt_id_unique, 716 + }; 717 + 718 + if (WARN_ON_ONCE(!ns)) 719 + return; 720 + 721 + /* 722 + * This is an optimization as well as making sure fsnotify_init() has 723 + * been called. 724 + */ 725 + if (!ns->n_fsnotify_marks) 726 + return; 727 + 728 + fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); 729 + } 730 + 724 731 static __init int fsnotify_init(void) 725 732 { 726 733 int ret; 727 734 728 - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); 735 + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); 729 736 730 737 ret = init_srcu_struct(&fsnotify_mark_srcu); 731 738 if (ret)

+11

fs/notify/fsnotify.h

··· 33 33 return conn->obj; 34 34 } 35 35 36 + static inline struct mnt_namespace *fsnotify_conn_mntns( 37 + struct fsnotify_mark_connector *conn) 38 + { 39 + return conn->obj; 40 + } 41 + 36 42 static inline struct super_block *fsnotify_object_sb(void *obj, 37 43 enum fsnotify_obj_type obj_type) 38 44 { ··· 93 87 static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) 94 88 { 95 89 fsnotify_destroy_marks(fsnotify_sb_marks(sb)); 90 + } 91 + 92 + static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) 93 + { 94 + fsnotify_destroy_marks(&mntns->n_fsnotify_marks); 96 95 } 97 96 98 97 /*

+11 -3

fs/notify/mark.c

··· 107 107 return &real_mount(obj)->mnt_fsnotify_marks; 108 108 case FSNOTIFY_OBJ_TYPE_SB: 109 109 return fsnotify_sb_marks(obj); 110 + case FSNOTIFY_OBJ_TYPE_MNTNS: 111 + return &((struct mnt_namespace *)obj)->n_fsnotify_marks; 110 112 default: 111 113 return NULL; 112 114 } ··· 122 120 return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; 123 121 else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) 124 122 return &fsnotify_conn_sb(conn)->s_fsnotify_mask; 123 + else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) 124 + return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; 125 125 return NULL; 126 126 } 127 127 ··· 350 346 fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; 351 347 } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { 352 348 fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; 349 + } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { 350 + fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; 353 351 } 354 352 355 353 rcu_assign_pointer(*connp, NULL); 356 354 conn->obj = NULL; 357 355 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 358 - fsnotify_update_sb_watchers(sb, conn); 356 + if (sb) 357 + fsnotify_update_sb_watchers(sb, conn); 359 358 360 359 return inode; 361 360 } ··· 731 724 * Attach the sb info before attaching a connector to any object on sb. 732 725 * The sb info will remain attached as long as sb lives. 733 726 */ 734 - if (!fsnotify_sb_info(sb)) { 727 + if (sb && !fsnotify_sb_info(sb)) { 735 728 err = fsnotify_attach_info_to_sb(sb); 736 729 if (err) 737 730 return err; ··· 777 770 /* mark should be the last entry. last is the current last entry */ 778 771 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 779 772 added: 780 - fsnotify_update_sb_watchers(sb, conn); 773 + if (sb) 774 + fsnotify_update_sb_watchers(sb, conn); 781 775 /* 782 776 * Since connector is attached to object using cmpxchg() we are 783 777 * guaranteed that connector initialization is fully visible by anyone

+3 -1

fs/pnode.c

··· 549 549 mp = parent->mnt_mp; 550 550 parent = parent->mnt_parent; 551 551 } 552 - if (parent != mnt->mnt_parent) 552 + if (parent != mnt->mnt_parent) { 553 553 mnt_change_mountpoint(parent, mp, mnt); 554 + mnt_notify_add(mnt); 555 + } 554 556 } 555 557 } 556 558

+8 -4

include/linux/fanotify.h

··· 25 25 26 26 #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) 27 27 28 - #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) 28 + #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT) 29 29 30 30 /* 31 31 * fanotify_init() flags that require CAP_SYS_ADMIN. ··· 38 38 FAN_REPORT_PIDFD | \ 39 39 FAN_REPORT_FD_ERROR | \ 40 40 FAN_UNLIMITED_QUEUE | \ 41 - FAN_UNLIMITED_MARKS) 41 + FAN_UNLIMITED_MARKS | \ 42 + FAN_REPORT_MNT) 42 43 43 44 /* 44 45 * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. ··· 59 58 #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) 60 59 61 60 #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ 62 - FAN_MARK_FILESYSTEM) 61 + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS) 63 62 64 63 #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ 65 64 FAN_MARK_FLUSH) ··· 110 109 /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ 111 110 #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) 112 111 112 + #define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH) 113 + 113 114 /* Events that user can request to be notified on */ 114 115 #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ 115 116 FANOTIFY_INODE_EVENTS | \ 116 - FANOTIFY_ERROR_EVENTS) 117 + FANOTIFY_ERROR_EVENTS | \ 118 + FANOTIFY_MOUNT_EVENTS) 117 119 118 120 /* Extra flags that may be reported with event or control handling of events */ 119 121 #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)

+20

include/linux/fsnotify.h

··· 320 320 __fsnotify_vfsmount_delete(mnt); 321 321 } 322 322 323 + static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) 324 + { 325 + __fsnotify_mntns_delete(mntns); 326 + } 327 + 323 328 /* 324 329 * fsnotify_inoderemove - an inode is going away 325 330 */ ··· 531 526 532 527 return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, 533 528 NULL, NULL, NULL, 0); 529 + } 530 + 531 + static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) 532 + { 533 + fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); 534 + } 535 + 536 + static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) 537 + { 538 + fsnotify_mnt(FS_MNT_DETACH, ns, mnt); 539 + } 540 + 541 + static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) 542 + { 543 + fsnotify_mnt(FS_MNT_MOVE, ns, mnt); 534 544 } 535 545 536 546 #endif /* _LINUX_FS_NOTIFY_H */

+42

include/linux/fsnotify_backend.h

··· 59 59 60 60 #define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */ 61 61 62 + #define FS_MNT_ATTACH 0x01000000 /* Mount was attached */ 63 + #define FS_MNT_DETACH 0x02000000 /* Mount was detached */ 64 + #define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH) 65 + 62 66 /* 63 67 * Set on inode mark that cares about things that happen to its children. 64 68 * Always set for dnotify and inotify. ··· 83 79 * when a directory entry inside a child subdir changes. 84 80 */ 85 81 #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) 82 + 83 + /* Mount namespace events */ 84 + #define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH) 86 85 87 86 /* Content events can be used to inspect file content */ 88 87 #define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \ ··· 115 108 116 109 /* Events that can be reported to backends */ 117 110 #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ 111 + FSNOTIFY_MNT_EVENTS | \ 118 112 FS_EVENTS_POSS_ON_CHILD | \ 119 113 FS_DELETE_SELF | FS_MOVE_SELF | \ 120 114 FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ ··· 306 298 FSNOTIFY_EVENT_PATH, 307 299 FSNOTIFY_EVENT_INODE, 308 300 FSNOTIFY_EVENT_DENTRY, 301 + FSNOTIFY_EVENT_MNT, 309 302 FSNOTIFY_EVENT_ERROR, 310 303 }; 311 304 ··· 326 317 { 327 318 return range->path; 328 319 } 320 + 321 + struct fsnotify_mnt { 322 + const struct mnt_namespace *ns; 323 + u64 mnt_id; 324 + }; 329 325 330 326 static inline struct inode *fsnotify_data_inode(const void *data, int data_type) 331 327 { ··· 397 383 } 398 384 } 399 385 386 + static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data, 387 + int data_type) 388 + { 389 + switch (data_type) { 390 + case FSNOTIFY_EVENT_MNT: 391 + return data; 392 + default: 393 + return NULL; 394 + } 395 + } 396 + 397 + static inline u64 fsnotify_data_mnt_id(const void *data, int data_type) 398 + { 399 + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); 400 + 401 + return mnt_data ? mnt_data->mnt_id : 0; 402 + } 403 + 400 404 static inline struct fs_error_report *fsnotify_data_error_report( 401 405 const void *data, 402 406 int data_type) ··· 452 420 FSNOTIFY_ITER_TYPE_SB, 453 421 FSNOTIFY_ITER_TYPE_PARENT, 454 422 FSNOTIFY_ITER_TYPE_INODE2, 423 + FSNOTIFY_ITER_TYPE_MNTNS, 455 424 FSNOTIFY_ITER_TYPE_COUNT 456 425 }; 457 426 ··· 462 429 FSNOTIFY_OBJ_TYPE_INODE, 463 430 FSNOTIFY_OBJ_TYPE_VFSMOUNT, 464 431 FSNOTIFY_OBJ_TYPE_SB, 432 + FSNOTIFY_OBJ_TYPE_MNTNS, 465 433 FSNOTIFY_OBJ_TYPE_COUNT, 466 434 FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT 467 435 }; ··· 647 613 extern void __fsnotify_inode_delete(struct inode *inode); 648 614 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); 649 615 extern void fsnotify_sb_delete(struct super_block *sb); 616 + extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns); 650 617 extern void fsnotify_sb_free(struct super_block *sb); 651 618 extern u32 fsnotify_get_cookie(void); 619 + extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt); 652 620 653 621 static inline __u32 fsnotify_parent_needed_mask(__u32 mask) 654 622 { ··· 964 928 static inline void fsnotify_sb_delete(struct super_block *sb) 965 929 {} 966 930 931 + static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns) 932 + {} 933 + 967 934 static inline void fsnotify_sb_free(struct super_block *sb) 968 935 {} 969 936 ··· 979 940 } 980 941 981 942 static inline void fsnotify_unmount_inodes(struct super_block *sb) 943 + {} 944 + 945 + static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) 982 946 {} 983 947 984 948 #endif /* CONFIG_FSNOTIFY */

+5

include/linux/mnt_idmapping.h

··· 25 25 static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); 26 26 static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); 27 27 28 + static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) 29 + { 30 + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; 31 + } 32 + 28 33 #ifdef CONFIG_MULTIUSER 29 34 static inline uid_t __vfsuid_val(vfsuid_t uid) 30 35 {

+4

include/linux/syscalls.h

··· 951 951 asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, 952 952 int flags, uint32_t sig); 953 953 asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); 954 + asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, 955 + unsigned flags, 956 + struct mount_attr __user *uattr, 957 + size_t usize); 954 958 asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, 955 959 int to_dfd, const char __user *to_path, 956 960 unsigned int ms_flags);

+6

include/linux/uidgid.h

··· 132 132 133 133 u32 map_id_down(struct uid_gid_map *map, u32 id); 134 134 u32 map_id_up(struct uid_gid_map *map, u32 id); 135 + u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count); 135 136 136 137 #else 137 138 ··· 183 182 } 184 183 185 184 static inline u32 map_id_down(struct uid_gid_map *map, u32 id) 185 + { 186 + return id; 187 + } 188 + 189 + static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) 186 190 { 187 191 return id; 188 192 }

+3 -1

include/uapi/asm-generic/unistd.h

··· 849 849 __SYSCALL(__NR_listxattrat, sys_listxattrat) 850 850 #define __NR_removexattrat 466 851 851 __SYSCALL(__NR_removexattrat, sys_removexattrat) 852 + #define __NR_open_tree_attr 467 853 + __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) 852 854 853 855 #undef __NR_syscalls 854 - #define __NR_syscalls 467 856 + #define __NR_syscalls 468 855 857 856 858 /* 857 859 * 32 bit systems traditionally used different

+10

include/uapi/linux/fanotify.h

··· 28 28 /* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ 29 29 30 30 #define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ 31 + #define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ 32 + #define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ 31 33 32 34 #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ 33 35 ··· 66 64 #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ 67 65 #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ 68 66 #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ 67 + #define FAN_REPORT_MNT 0x00004000 /* Report mount events */ 69 68 70 69 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ 71 70 #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) ··· 97 94 #define FAN_MARK_INODE 0x00000000 98 95 #define FAN_MARK_MOUNT 0x00000010 99 96 #define FAN_MARK_FILESYSTEM 0x00000100 97 + #define FAN_MARK_MNTNS 0x00000110 100 98 101 99 /* 102 100 * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY ··· 151 147 #define FAN_EVENT_INFO_TYPE_PIDFD 4 152 148 #define FAN_EVENT_INFO_TYPE_ERROR 5 153 149 #define FAN_EVENT_INFO_TYPE_RANGE 6 150 + #define FAN_EVENT_INFO_TYPE_MNT 7 154 151 155 152 /* Special info types for FAN_RENAME */ 156 153 #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 ··· 203 198 __u32 pad; 204 199 __u64 offset; 205 200 __u64 count; 201 + }; 202 + 203 + struct fanotify_event_info_mnt { 204 + struct fanotify_event_info_header hdr; 205 + __u64 mnt_id; 206 206 }; 207 207 208 208 /*

+9 -1

include/uapi/linux/mount.h

··· 179 179 __u32 opt_array; /* [str] Array of nul terminated fs options */ 180 180 __u32 opt_sec_num; /* Number of security options */ 181 181 __u32 opt_sec_array; /* [str] Array of nul terminated security options */ 182 - __u64 __spare2[46]; 182 + __u64 supported_mask; /* Mask flags that this kernel supports */ 183 + __u32 mnt_uidmap_num; /* Number of uid mappings */ 184 + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ 185 + __u32 mnt_gidmap_num; /* Number of gid mappings */ 186 + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ 187 + __u64 __spare2[43]; 183 188 char str[]; /* Variable size part containing strings */ 184 189 }; 185 190 ··· 222 217 #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ 223 218 #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ 224 219 #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ 220 + #define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ 221 + #define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ 222 + #define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ 225 223 226 224 /* 227 225 * Special @mnt_id values that can be passed to listmount

+17 -9

kernel/user_namespace.c

··· 238 238 struct idmap_key { 239 239 bool map_up; /* true -> id from kid; false -> kid from id */ 240 240 u32 id; /* id to find */ 241 - u32 count; /* == 0 unless used with map_id_range_down() */ 241 + u32 count; 242 242 }; 243 243 244 244 /* ··· 343 343 * UID_GID_MAP_MAX_BASE_EXTENTS. 344 344 */ 345 345 static struct uid_gid_extent * 346 - map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) 346 + map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) 347 347 { 348 348 unsigned idx; 349 - u32 first, last; 349 + u32 first, last, id2; 350 + 351 + id2 = id + count - 1; 350 352 351 353 /* Find the matching extent */ 352 354 for (idx = 0; idx < extents; idx++) { 353 355 first = map->extent[idx].lower_first; 354 356 last = first + map->extent[idx].count - 1; 355 - if (id >= first && id <= last) 357 + if (id >= first && id <= last && 358 + (id2 >= first && id2 <= last)) 356 359 return &map->extent[idx]; 357 360 } 358 361 return NULL; ··· 366 363 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. 367 364 */ 368 365 static struct uid_gid_extent * 369 - map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id) 366 + map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) 370 367 { 371 368 struct idmap_key key; 372 369 373 370 key.map_up = true; 374 - key.count = 1; 371 + key.count = count; 375 372 key.id = id; 376 373 377 374 return bsearch(&key, map->reverse, extents, 378 375 sizeof(struct uid_gid_extent), cmp_map_id); 379 376 } 380 377 381 - u32 map_id_up(struct uid_gid_map *map, u32 id) 378 + u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) 382 379 { 383 380 struct uid_gid_extent *extent; 384 381 unsigned extents = map->nr_extents; 385 382 smp_rmb(); 386 383 387 384 if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) 388 - extent = map_id_up_base(extents, map, id); 385 + extent = map_id_range_up_base(extents, map, id, count); 389 386 else 390 - extent = map_id_up_max(extents, map, id); 387 + extent = map_id_range_up_max(extents, map, id, count); 391 388 392 389 /* Map the id or note failure */ 393 390 if (extent) ··· 396 393 id = (u32) -1; 397 394 398 395 return id; 396 + } 397 + 398 + u32 map_id_up(struct uid_gid_map *map, u32 id) 399 + { 400 + return map_id_range_up(map, id, 1); 399 401 } 400 402 401 403 /**

+13 -1

samples/vfs/samples-vfs.h

··· 42 42 __u32 opt_array; /* [str] Array of nul terminated fs options */ 43 43 __u32 opt_sec_num; /* Number of security options */ 44 44 __u32 opt_sec_array; /* [str] Array of nul terminated security options */ 45 - __u64 __spare2[46]; 45 + __u32 mnt_uidmap_num; /* Number of uid mappings */ 46 + __u32 mnt_uidmap; /* [str] Array of uid mappings */ 47 + __u32 mnt_gidmap_num; /* Number of gid mappings */ 48 + __u32 mnt_gidmap; /* [str] Array of gid mappings */ 49 + __u64 __spare2[44]; 46 50 char str[]; /* Variable size part containing strings */ 47 51 }; 48 52 ··· 160 156 161 157 #ifndef STATX_MNT_ID_UNIQUE 162 158 #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ 159 + #endif 160 + 161 + #ifndef STATMOUNT_MNT_UIDMAP 162 + #define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ 163 + #endif 164 + 165 + #ifndef STATMOUNT_MNT_GIDMAP 166 + #define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ 163 167 #endif 164 168 165 169 #ifndef MOUNT_ATTR_RDONLY

+29 -6

samples/vfs/test-list-all-mounts.c

··· 128 128 STATMOUNT_MNT_POINT | 129 129 STATMOUNT_MNT_NS_ID | 130 130 STATMOUNT_MNT_OPTS | 131 - STATMOUNT_FS_TYPE, 0); 131 + STATMOUNT_FS_TYPE | 132 + STATMOUNT_MNT_UIDMAP | 133 + STATMOUNT_MNT_GIDMAP, 0); 132 134 if (!stmnt) { 133 135 printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", 134 136 (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); 135 137 continue; 136 138 } 137 139 138 - printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n", 140 + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n", 139 141 (uint64_t)stmnt->mnt_id, 140 142 (uint64_t)stmnt->mnt_parent_id, 141 - stmnt->str + stmnt->fs_type, 142 - stmnt->str + stmnt->mnt_root, 143 - stmnt->str + stmnt->mnt_point, 144 - stmnt->str + stmnt->mnt_opts); 143 + (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", 144 + (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "", 145 + (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", 146 + (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); 147 + 148 + if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { 149 + const char *idmap = stmnt->str + stmnt->mnt_uidmap; 150 + 151 + for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) { 152 + printf("mnt_uidmap[%zu]:\t%s\n", idx, idmap); 153 + idmap += strlen(idmap) + 1; 154 + } 155 + } 156 + 157 + if (stmnt->mask & STATMOUNT_MNT_GIDMAP) { 158 + const char *idmap = stmnt->str + stmnt->mnt_gidmap; 159 + 160 + for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) { 161 + printf("mnt_gidmap[%zu]:\t%s\n", idx, idmap); 162 + idmap += strlen(idmap) + 1; 163 + } 164 + } 165 + 166 + printf("\n"); 167 + 145 168 free(stmnt); 146 169 } 147 170 }

+1

scripts/syscall.tbl

··· 407 407 464 common getxattrat sys_getxattrat 408 408 465 common listxattrat sys_listxattrat 409 409 466 common removexattrat sys_removexattrat 410 + 467 common open_tree_attr sys_open_tree_attr

+3

security/selinux/hooks.c

··· 3395 3395 case FSNOTIFY_OBJ_TYPE_INODE: 3396 3396 perm = FILE__WATCH; 3397 3397 break; 3398 + case FSNOTIFY_OBJ_TYPE_MNTNS: 3399 + perm = FILE__WATCH_MOUNTNS; 3400 + break; 3398 3401 default: 3399 3402 return -EINVAL; 3400 3403 }

+1 -1

security/selinux/include/classmap.h

··· 8 8 COMMON_FILE_SOCK_PERMS, "unlink", "link", "rename", "execute", \ 9 9 "quotaon", "mounton", "audit_access", "open", "execmod", \ 10 10 "watch", "watch_mount", "watch_sb", "watch_with_perm", \ 11 - "watch_reads" 11 + "watch_reads", "watch_mountns" 12 12 13 13 #define COMMON_SOCK_PERMS \ 14 14 COMMON_FILE_SOCK_PERMS, "bind", "connect", "listen", "accept", \

+1

tools/testing/selftests/Makefile

··· 35 35 TARGETS += filesystems/fat 36 36 TARGETS += filesystems/overlayfs 37 37 TARGETS += filesystems/statmount 38 + TARGETS += filesystems/mount-notify 38 39 TARGETS += firmware 39 40 TARGETS += fpu 40 41 TARGETS += ftrace

+2

tools/testing/selftests/filesystems/mount-notify/.gitignore

··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + /*_test

+6

tools/testing/selftests/filesystems/mount-notify/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) 4 + TEST_GEN_PROGS := mount-notify_test 5 + 6 + include ../../lib.mk

+516

tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> 3 + 4 + #define _GNU_SOURCE 5 + #include <fcntl.h> 6 + #include <sched.h> 7 + #include <stdio.h> 8 + #include <string.h> 9 + #include <sys/stat.h> 10 + #include <sys/mount.h> 11 + #include <linux/fanotify.h> 12 + #include <unistd.h> 13 + #include <sys/fanotify.h> 14 + #include <sys/syscall.h> 15 + 16 + #include "../../kselftest_harness.h" 17 + #include "../statmount/statmount.h" 18 + 19 + #ifndef FAN_MNT_ATTACH 20 + struct fanotify_event_info_mnt { 21 + struct fanotify_event_info_header hdr; 22 + __u64 mnt_id; 23 + }; 24 + #define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ 25 + #endif 26 + 27 + #ifndef FAN_MNT_DETACH 28 + #define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ 29 + #endif 30 + 31 + #ifndef FAN_REPORT_MNT 32 + #define FAN_REPORT_MNT 0x00004000 /* Report mount events */ 33 + #endif 34 + 35 + #ifndef FAN_MARK_MNTNS 36 + #define FAN_MARK_MNTNS 0x00000110 37 + #endif 38 + 39 + static uint64_t get_mnt_id(struct __test_metadata *const _metadata, 40 + const char *path) 41 + { 42 + struct statx sx; 43 + 44 + ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); 45 + ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); 46 + return sx.stx_mnt_id; 47 + } 48 + 49 + static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; 50 + 51 + FIXTURE(fanotify) { 52 + int fan_fd; 53 + char buf[256]; 54 + unsigned int rem; 55 + void *next; 56 + char root_mntpoint[sizeof(root_mntpoint_templ)]; 57 + int orig_root; 58 + int ns_fd; 59 + uint64_t root_id; 60 + }; 61 + 62 + FIXTURE_SETUP(fanotify) 63 + { 64 + int ret; 65 + 66 + ASSERT_EQ(unshare(CLONE_NEWNS), 0); 67 + 68 + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); 69 + ASSERT_GE(self->ns_fd, 0); 70 + 71 + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); 72 + 73 + strcpy(self->root_mntpoint, root_mntpoint_templ); 74 + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); 75 + 76 + self->orig_root = open("/", O_PATH | O_CLOEXEC); 77 + ASSERT_GE(self->orig_root, 0); 78 + 79 + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); 80 + 81 + ASSERT_EQ(chroot(self->root_mntpoint), 0); 82 + 83 + ASSERT_EQ(chdir("/"), 0); 84 + 85 + ASSERT_EQ(mkdir("a", 0700), 0); 86 + 87 + ASSERT_EQ(mkdir("b", 0700), 0); 88 + 89 + self->root_id = get_mnt_id(_metadata, "/"); 90 + ASSERT_NE(self->root_id, 0); 91 + 92 + self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); 93 + ASSERT_GE(self->fan_fd, 0); 94 + 95 + ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, 96 + FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); 97 + ASSERT_EQ(ret, 0); 98 + 99 + self->rem = 0; 100 + } 101 + 102 + FIXTURE_TEARDOWN(fanotify) 103 + { 104 + ASSERT_EQ(self->rem, 0); 105 + close(self->fan_fd); 106 + 107 + ASSERT_EQ(fchdir(self->orig_root), 0); 108 + 109 + ASSERT_EQ(chroot("."), 0); 110 + 111 + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); 112 + EXPECT_EQ(chdir(self->root_mntpoint), 0); 113 + EXPECT_EQ(chdir("/"), 0); 114 + EXPECT_EQ(rmdir(self->root_mntpoint), 0); 115 + } 116 + 117 + static uint64_t expect_notify(struct __test_metadata *const _metadata, 118 + FIXTURE_DATA(fanotify) *self, 119 + uint64_t *mask) 120 + { 121 + struct fanotify_event_metadata *meta; 122 + struct fanotify_event_info_mnt *mnt; 123 + unsigned int thislen; 124 + 125 + if (!self->rem) { 126 + ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); 127 + ASSERT_GT(len, 0); 128 + 129 + self->rem = len; 130 + self->next = (void *) self->buf; 131 + } 132 + 133 + meta = self->next; 134 + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); 135 + 136 + thislen = meta->event_len; 137 + self->rem -= thislen; 138 + self->next += thislen; 139 + 140 + *mask = meta->mask; 141 + thislen -= sizeof(*meta); 142 + 143 + mnt = ((void *) meta) + meta->event_len - thislen; 144 + 145 + ASSERT_EQ(thislen, sizeof(*mnt)); 146 + 147 + return mnt->mnt_id; 148 + } 149 + 150 + static void expect_notify_n(struct __test_metadata *const _metadata, 151 + FIXTURE_DATA(fanotify) *self, 152 + unsigned int n, uint64_t mask[], uint64_t mnts[]) 153 + { 154 + unsigned int i; 155 + 156 + for (i = 0; i < n; i++) 157 + mnts[i] = expect_notify(_metadata, self, &mask[i]); 158 + } 159 + 160 + static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, 161 + FIXTURE_DATA(fanotify) *self, 162 + uint64_t expect_mask) 163 + { 164 + uint64_t mntid, mask; 165 + 166 + mntid = expect_notify(_metadata, self, &mask); 167 + ASSERT_EQ(expect_mask, mask); 168 + 169 + return mntid; 170 + } 171 + 172 + 173 + static void expect_notify_mask_n(struct __test_metadata *const _metadata, 174 + FIXTURE_DATA(fanotify) *self, 175 + uint64_t mask, unsigned int n, uint64_t mnts[]) 176 + { 177 + unsigned int i; 178 + 179 + for (i = 0; i < n; i++) 180 + mnts[i] = expect_notify_mask(_metadata, self, mask); 181 + } 182 + 183 + static void verify_mount_ids(struct __test_metadata *const _metadata, 184 + const uint64_t list1[], const uint64_t list2[], 185 + size_t num) 186 + { 187 + unsigned int i, j; 188 + 189 + // Check that neither list has any duplicates 190 + for (i = 0; i < num; i++) { 191 + for (j = 0; j < num; j++) { 192 + if (i != j) { 193 + ASSERT_NE(list1[i], list1[j]); 194 + ASSERT_NE(list2[i], list2[j]); 195 + } 196 + } 197 + } 198 + // Check that all list1 memebers can be found in list2. Together with 199 + // the above it means that the list1 and list2 represent the same sets. 200 + for (i = 0; i < num; i++) { 201 + for (j = 0; j < num; j++) { 202 + if (list1[i] == list2[j]) 203 + break; 204 + } 205 + ASSERT_NE(j, num); 206 + } 207 + } 208 + 209 + static void check_mounted(struct __test_metadata *const _metadata, 210 + const uint64_t mnts[], size_t num) 211 + { 212 + ssize_t ret; 213 + uint64_t *list; 214 + 215 + list = malloc((num + 1) * sizeof(list[0])); 216 + ASSERT_NE(list, NULL); 217 + 218 + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); 219 + ASSERT_EQ(ret, num); 220 + 221 + verify_mount_ids(_metadata, mnts, list, num); 222 + 223 + free(list); 224 + } 225 + 226 + static void setup_mount_tree(struct __test_metadata *const _metadata, 227 + int log2_num) 228 + { 229 + int ret, i; 230 + 231 + ret = mount("", "/", NULL, MS_SHARED, NULL); 232 + ASSERT_EQ(ret, 0); 233 + 234 + for (i = 0; i < log2_num; i++) { 235 + ret = mount("/", "/", NULL, MS_BIND, NULL); 236 + ASSERT_EQ(ret, 0); 237 + } 238 + } 239 + 240 + TEST_F(fanotify, bind) 241 + { 242 + int ret; 243 + uint64_t mnts[2] = { self->root_id }; 244 + 245 + ret = mount("/", "/", NULL, MS_BIND, NULL); 246 + ASSERT_EQ(ret, 0); 247 + 248 + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 249 + ASSERT_NE(mnts[0], mnts[1]); 250 + 251 + check_mounted(_metadata, mnts, 2); 252 + 253 + // Cleanup 254 + uint64_t detach_id; 255 + ret = umount("/"); 256 + ASSERT_EQ(ret, 0); 257 + 258 + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 259 + ASSERT_EQ(detach_id, mnts[1]); 260 + 261 + check_mounted(_metadata, mnts, 1); 262 + } 263 + 264 + TEST_F(fanotify, move) 265 + { 266 + int ret; 267 + uint64_t mnts[2] = { self->root_id }; 268 + uint64_t move_id; 269 + 270 + ret = mount("/", "/a", NULL, MS_BIND, NULL); 271 + ASSERT_EQ(ret, 0); 272 + 273 + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 274 + ASSERT_NE(mnts[0], mnts[1]); 275 + 276 + check_mounted(_metadata, mnts, 2); 277 + 278 + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); 279 + ASSERT_EQ(ret, 0); 280 + 281 + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); 282 + ASSERT_EQ(move_id, mnts[1]); 283 + 284 + // Cleanup 285 + ret = umount("/b"); 286 + ASSERT_EQ(ret, 0); 287 + 288 + check_mounted(_metadata, mnts, 1); 289 + } 290 + 291 + TEST_F(fanotify, propagate) 292 + { 293 + const unsigned int log2_num = 4; 294 + const unsigned int num = (1 << log2_num); 295 + uint64_t mnts[num]; 296 + 297 + setup_mount_tree(_metadata, log2_num); 298 + 299 + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); 300 + 301 + mnts[0] = self->root_id; 302 + check_mounted(_metadata, mnts, num); 303 + 304 + // Cleanup 305 + int ret; 306 + uint64_t mnts2[num]; 307 + ret = umount2("/", MNT_DETACH); 308 + ASSERT_EQ(ret, 0); 309 + 310 + ret = mount("", "/", NULL, MS_PRIVATE, NULL); 311 + ASSERT_EQ(ret, 0); 312 + 313 + mnts2[0] = self->root_id; 314 + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); 315 + verify_mount_ids(_metadata, mnts, mnts2, num); 316 + 317 + check_mounted(_metadata, mnts, 1); 318 + } 319 + 320 + TEST_F(fanotify, fsmount) 321 + { 322 + int ret, fs, mnt; 323 + uint64_t mnts[2] = { self->root_id }; 324 + 325 + fs = fsopen("tmpfs", 0); 326 + ASSERT_GE(fs, 0); 327 + 328 + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); 329 + ASSERT_EQ(ret, 0); 330 + 331 + mnt = fsmount(fs, 0, 0); 332 + ASSERT_GE(mnt, 0); 333 + 334 + close(fs); 335 + 336 + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); 337 + ASSERT_EQ(ret, 0); 338 + 339 + close(mnt); 340 + 341 + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 342 + ASSERT_NE(mnts[0], mnts[1]); 343 + 344 + check_mounted(_metadata, mnts, 2); 345 + 346 + // Cleanup 347 + uint64_t detach_id; 348 + ret = umount("/a"); 349 + ASSERT_EQ(ret, 0); 350 + 351 + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); 352 + ASSERT_EQ(detach_id, mnts[1]); 353 + 354 + check_mounted(_metadata, mnts, 1); 355 + } 356 + 357 + TEST_F(fanotify, reparent) 358 + { 359 + uint64_t mnts[6] = { self->root_id }; 360 + uint64_t dmnts[3]; 361 + uint64_t masks[3]; 362 + unsigned int i; 363 + int ret; 364 + 365 + // Create setup with a[1] -> b[2] propagation 366 + ret = mount("/", "/a", NULL, MS_BIND, NULL); 367 + ASSERT_EQ(ret, 0); 368 + 369 + ret = mount("", "/a", NULL, MS_SHARED, NULL); 370 + ASSERT_EQ(ret, 0); 371 + 372 + ret = mount("/a", "/b", NULL, MS_BIND, NULL); 373 + ASSERT_EQ(ret, 0); 374 + 375 + ret = mount("", "/b", NULL, MS_SLAVE, NULL); 376 + ASSERT_EQ(ret, 0); 377 + 378 + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 379 + 380 + check_mounted(_metadata, mnts, 3); 381 + 382 + // Mount on a[3], which is propagated to b[4] 383 + ret = mount("/", "/a", NULL, MS_BIND, NULL); 384 + ASSERT_EQ(ret, 0); 385 + 386 + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); 387 + 388 + check_mounted(_metadata, mnts, 5); 389 + 390 + // Mount on b[5], not propagated 391 + ret = mount("/", "/b", NULL, MS_BIND, NULL); 392 + ASSERT_EQ(ret, 0); 393 + 394 + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 395 + 396 + check_mounted(_metadata, mnts, 6); 397 + 398 + // Umount a[3], which is propagated to b[4], but not b[5] 399 + // This will result in b[5] "falling" on b[2] 400 + ret = umount("/a"); 401 + ASSERT_EQ(ret, 0); 402 + 403 + expect_notify_n(_metadata, self, 3, masks, dmnts); 404 + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); 405 + 406 + for (i = 0; i < 3; i++) { 407 + if (dmnts[i] == mnts[5]) { 408 + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); 409 + } else { 410 + ASSERT_EQ(masks[i], FAN_MNT_DETACH); 411 + } 412 + } 413 + 414 + mnts[3] = mnts[5]; 415 + check_mounted(_metadata, mnts, 4); 416 + 417 + // Cleanup 418 + ret = umount("/b"); 419 + ASSERT_EQ(ret, 0); 420 + 421 + ret = umount("/a"); 422 + ASSERT_EQ(ret, 0); 423 + 424 + ret = umount("/b"); 425 + ASSERT_EQ(ret, 0); 426 + 427 + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); 428 + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); 429 + 430 + check_mounted(_metadata, mnts, 1); 431 + } 432 + 433 + TEST_F(fanotify, rmdir) 434 + { 435 + uint64_t mnts[3] = { self->root_id }; 436 + int ret; 437 + 438 + ret = mount("/", "/a", NULL, MS_BIND, NULL); 439 + ASSERT_EQ(ret, 0); 440 + 441 + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); 442 + ASSERT_EQ(ret, 0); 443 + 444 + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); 445 + 446 + check_mounted(_metadata, mnts, 3); 447 + 448 + ret = chdir("/a"); 449 + ASSERT_EQ(ret, 0); 450 + 451 + ret = fork(); 452 + ASSERT_GE(ret, 0); 453 + 454 + if (ret == 0) { 455 + chdir("/"); 456 + unshare(CLONE_NEWNS); 457 + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); 458 + umount2("/a", MNT_DETACH); 459 + // This triggers a detach in the other namespace 460 + rmdir("/a"); 461 + exit(0); 462 + } 463 + wait(NULL); 464 + 465 + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); 466 + check_mounted(_metadata, mnts, 1); 467 + 468 + // Cleanup 469 + ret = chdir("/"); 470 + ASSERT_EQ(ret, 0); 471 + } 472 + 473 + TEST_F(fanotify, pivot_root) 474 + { 475 + uint64_t mnts[3] = { self->root_id }; 476 + uint64_t mnts2[3]; 477 + int ret; 478 + 479 + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); 480 + ASSERT_EQ(ret, 0); 481 + 482 + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 483 + 484 + ret = mkdir("/a/new", 0700); 485 + ASSERT_EQ(ret, 0); 486 + 487 + ret = mkdir("/a/old", 0700); 488 + ASSERT_EQ(ret, 0); 489 + 490 + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); 491 + ASSERT_EQ(ret, 0); 492 + 493 + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); 494 + check_mounted(_metadata, mnts, 3); 495 + 496 + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); 497 + ASSERT_EQ(ret, 0); 498 + 499 + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); 500 + verify_mount_ids(_metadata, mnts, mnts2, 2); 501 + check_mounted(_metadata, mnts, 3); 502 + 503 + // Cleanup 504 + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); 505 + ASSERT_EQ(ret, 0); 506 + 507 + ret = umount("/a/new"); 508 + ASSERT_EQ(ret, 0); 509 + 510 + ret = umount("/a"); 511 + ASSERT_EQ(ret, 0); 512 + 513 + check_mounted(_metadata, mnts, 1); 514 + } 515 + 516 + TEST_HARNESS_MAIN

+195

tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c

··· 20 20 FIXTURE_SETUP(set_layers_via_fds) 21 21 { 22 22 ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0); 23 + ASSERT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0); 23 24 } 24 25 25 26 FIXTURE_TEARDOWN(set_layers_via_fds) 26 27 { 27 28 umount2("/set_layers_via_fds", 0); 28 29 ASSERT_EQ(rmdir("/set_layers_via_fds"), 0); 30 + 31 + umount2("/set_layers_via_fds_tmpfs", 0); 32 + ASSERT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0); 29 33 } 30 34 31 35 TEST_F(set_layers_via_fds, set_layers_via_fds) ··· 216 212 ASSERT_GE(fd_overlay, 0); 217 213 ASSERT_EQ(close(fd_context), 0); 218 214 ASSERT_EQ(close(fd_overlay), 0); 215 + } 216 + 217 + TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds) 218 + { 219 + int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower; 220 + int layer_fds[500] = { [0 ... 499] = -EBADF }; 221 + 222 + ASSERT_EQ(unshare(CLONE_NEWNS), 0); 223 + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); 224 + 225 + fd_context = sys_fsopen("tmpfs", 0); 226 + ASSERT_GE(fd_context, 0); 227 + 228 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); 229 + fd_tmpfs = sys_fsmount(fd_context, 0, 0); 230 + ASSERT_GE(fd_tmpfs, 0); 231 + ASSERT_EQ(close(fd_context), 0); 232 + 233 + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { 234 + char path[100]; 235 + 236 + sprintf(path, "l%d", i); 237 + ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0); 238 + layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH); 239 + ASSERT_GE(layer_fds[i], 0); 240 + } 241 + 242 + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); 243 + fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH); 244 + ASSERT_GE(fd_work, 0); 245 + 246 + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); 247 + fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH); 248 + ASSERT_GE(fd_upper, 0); 249 + 250 + ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0); 251 + fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH); 252 + ASSERT_GE(fd_lower, 0); 253 + 254 + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); 255 + ASSERT_EQ(close(fd_tmpfs), 0); 256 + 257 + fd_context = sys_fsopen("overlay", 0); 258 + ASSERT_GE(fd_context, 0); 259 + 260 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0); 261 + ASSERT_EQ(close(fd_work), 0); 262 + 263 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0); 264 + ASSERT_EQ(close(fd_upper), 0); 265 + 266 + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { 267 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0); 268 + ASSERT_EQ(close(layer_fds[i]), 0); 269 + } 270 + 271 + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0); 272 + ASSERT_EQ(close(fd_lower), 0); 273 + 274 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); 275 + 276 + fd_overlay = sys_fsmount(fd_context, 0, 0); 277 + ASSERT_GE(fd_overlay, 0); 278 + ASSERT_EQ(close(fd_context), 0); 279 + ASSERT_EQ(close(fd_overlay), 0); 280 + } 281 + 282 + TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds) 283 + { 284 + int fd_context, fd_tmpfs, fd_overlay, fd_tmp; 285 + int layer_fds[] = { [0 ... 8] = -EBADF }; 286 + bool layers_found[] = { [0 ... 8] = false }; 287 + size_t len = 0; 288 + char *line = NULL; 289 + FILE *f_mountinfo; 290 + 291 + ASSERT_EQ(unshare(CLONE_NEWNS), 0); 292 + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); 293 + 294 + fd_context = sys_fsopen("tmpfs", 0); 295 + ASSERT_GE(fd_context, 0); 296 + 297 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); 298 + fd_tmpfs = sys_fsmount(fd_context, 0, 0); 299 + ASSERT_GE(fd_tmpfs, 0); 300 + ASSERT_EQ(close(fd_context), 0); 301 + 302 + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); 303 + ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0); 304 + ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0); 305 + ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0); 306 + ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0); 307 + ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0); 308 + ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0); 309 + ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0); 310 + ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0); 311 + ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0); 312 + 313 + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0); 314 + 315 + fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 316 + ASSERT_GE(fd_tmp, 0); 317 + 318 + layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH); 319 + ASSERT_GE(layer_fds[0], 0); 320 + 321 + layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH); 322 + ASSERT_GE(layer_fds[1], 0); 323 + 324 + layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 325 + ASSERT_GE(layer_fds[2], 0); 326 + 327 + layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 328 + ASSERT_GE(layer_fds[3], 0); 329 + 330 + layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 331 + ASSERT_GE(layer_fds[4], 0); 332 + 333 + layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 334 + ASSERT_GE(layer_fds[5], 0); 335 + 336 + layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 337 + ASSERT_GE(layer_fds[6], 0); 338 + 339 + layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 340 + ASSERT_GE(layer_fds[7], 0); 341 + 342 + layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 343 + ASSERT_GE(layer_fds[8], 0); 344 + 345 + ASSERT_EQ(close(fd_tmpfs), 0); 346 + 347 + fd_context = sys_fsopen("overlay", 0); 348 + ASSERT_GE(fd_context, 0); 349 + 350 + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0); 351 + 352 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0); 353 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0); 354 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0); 355 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0); 356 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0); 357 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0); 358 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0); 359 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0); 360 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0); 361 + 362 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0); 363 + 364 + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); 365 + 366 + fd_overlay = sys_fsmount(fd_context, 0, 0); 367 + ASSERT_GE(fd_overlay, 0); 368 + 369 + ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0); 370 + 371 + f_mountinfo = fopen("/proc/self/mountinfo", "r"); 372 + ASSERT_NE(f_mountinfo, NULL); 373 + 374 + while (getline(&line, &len, f_mountinfo) != -1) { 375 + char *haystack = line; 376 + 377 + if (strstr(haystack, "workdir=/tmp/w")) 378 + layers_found[0] = true; 379 + if (strstr(haystack, "upperdir=/tmp/u")) 380 + layers_found[1] = true; 381 + if (strstr(haystack, "lowerdir+=/tmp/l1")) 382 + layers_found[2] = true; 383 + if (strstr(haystack, "lowerdir+=/tmp/l2")) 384 + layers_found[3] = true; 385 + if (strstr(haystack, "lowerdir+=/tmp/l3")) 386 + layers_found[4] = true; 387 + if (strstr(haystack, "lowerdir+=/tmp/l4")) 388 + layers_found[5] = true; 389 + if (strstr(haystack, "datadir+=/tmp/d1")) 390 + layers_found[6] = true; 391 + if (strstr(haystack, "datadir+=/tmp/d2")) 392 + layers_found[7] = true; 393 + if (strstr(haystack, "datadir+=/tmp/d3")) 394 + layers_found[8] = true; 395 + } 396 + free(line); 397 + 398 + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { 399 + ASSERT_EQ(layers_found[i], true); 400 + ASSERT_EQ(close(layer_fds[i]), 0); 401 + } 402 + 403 + ASSERT_EQ(close(fd_context), 0); 404 + ASSERT_EQ(close(fd_overlay), 0); 405 + ASSERT_EQ(fclose(f_mountinfo), 0); 219 406 } 220 407 221 408 TEST_HARNESS_MAIN

+17

tools/testing/selftests/filesystems/overlayfs/wrappers.h

··· 44 44 to_pathname, flags); 45 45 } 46 46 47 + #ifndef OPEN_TREE_CLONE 48 + #define OPEN_TREE_CLONE 1 49 + #endif 50 + 51 + #ifndef OPEN_TREE_CLOEXEC 52 + #define OPEN_TREE_CLOEXEC O_CLOEXEC 53 + #endif 54 + 55 + #ifndef AT_RECURSIVE 56 + #define AT_RECURSIVE 0x8000 57 + #endif 58 + 59 + static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) 60 + { 61 + return syscall(__NR_open_tree, dfd, filename, flags); 62 + } 63 + 47 64 #endif

+1 -1

tools/testing/selftests/filesystems/statmount/statmount.h

··· 25 25 return syscall(__NR_statmount, &req, buf, bufsize, flags); 26 26 } 27 27 28 - static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, 28 + static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, 29 29 uint64_t last_mnt_id, uint64_t list[], size_t num, 30 30 unsigned int flags) 31 31 {

Configure Feed

Configure Feed