Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge patch series "namespace: allow creating empty mount namespaces"

Christian Brauner <brauner@kernel.org> says:

Currently, creating a new mount namespace always copies the entire mount
tree from the caller's namespace. For containers and sandboxes that
intend to build their mount table from scratch this is wasteful: they
inherit a potentially large mount tree only to immediately tear it down.

This series adds support for creating a mount namespace that contains
only a clone of the root mount, with none of the child mounts. Two new
flags are introduced:

- CLONE_EMPTY_MNTNS (0x400000000) for clone3(), using the 64-bit flag
space.
- UNSHARE_EMPTY_MNTNS (0x00100000) for unshare(), reusing the
CLONE_PARENT_SETTID bit which has no meaning for unshare.

Both flags imply CLONE_NEWNS. The resulting namespace contains a single
nullfs root mount with an immutable empty directory. The intended
workflow is to then mount a real filesystem (e.g., tmpfs) over the root
and build the mount table from there.

* patches from https://patch.msgid.link/20260306-work-empty-mntns-consolidated-v1-0-6eb30529bbb0@kernel.org:
selftests/filesystems: add clone3 tests for empty mount namespaces
selftests/filesystems: add tests for empty mount namespaces
namespace: allow creating empty mount namespaces

Link: https://patch.msgid.link/20260306-work-empty-mntns-consolidated-v1-0-6eb30529bbb0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>

+2053 -39
+57 -30
fs/namespace.c
··· 4233 4233 struct user_namespace *user_ns, struct fs_struct *new_fs) 4234 4234 { 4235 4235 struct mnt_namespace *new_ns; 4236 - struct vfsmount *rootmnt __free(mntput) = NULL; 4237 - struct vfsmount *pwdmnt __free(mntput) = NULL; 4236 + struct path old_root __free(path_put) = {}; 4237 + struct path old_pwd __free(path_put) = {}; 4238 4238 struct mount *p, *q; 4239 4239 struct mount *old; 4240 4240 struct mount *new; ··· 4254 4254 return new_ns; 4255 4255 4256 4256 guard(namespace_excl)(); 4257 - /* First pass: copy the tree topology */ 4258 - copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 4257 + 4258 + if (flags & CLONE_EMPTY_MNTNS) 4259 + copy_flags = 0; 4260 + else 4261 + copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 4259 4262 if (user_ns != ns->user_ns) 4260 4263 copy_flags |= CL_SLAVE; 4261 - new = copy_tree(old, old->mnt.mnt_root, copy_flags); 4264 + 4265 + if (flags & CLONE_EMPTY_MNTNS) 4266 + new = clone_mnt(old, old->mnt.mnt_root, copy_flags); 4267 + else 4268 + new = copy_tree(old, old->mnt.mnt_root, copy_flags); 4262 4269 if (IS_ERR(new)) { 4263 4270 emptied_ns = new_ns; 4264 4271 return ERR_CAST(new); ··· 4276 4269 } 4277 4270 new_ns->root = new; 4278 4271 4279 - /* 4280 - * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 4281 - * as belonging to new namespace. We have already acquired a private 4282 - * fs_struct, so tsk->fs->lock is not needed. 4283 - */ 4284 - p = old; 4285 - q = new; 4286 - while (p) { 4287 - mnt_add_to_ns(new_ns, q); 4288 - new_ns->nr_mounts++; 4272 + if (flags & CLONE_EMPTY_MNTNS) { 4273 + /* 4274 + * Empty mount namespace: only the root mount exists. 4275 + * Reset root and pwd to the cloned mount's root dentry. 4276 + */ 4289 4277 if (new_fs) { 4290 - if (&p->mnt == new_fs->root.mnt) { 4291 - new_fs->root.mnt = mntget(&q->mnt); 4292 - rootmnt = &p->mnt; 4293 - } 4294 - if (&p->mnt == new_fs->pwd.mnt) { 4295 - new_fs->pwd.mnt = mntget(&q->mnt); 4296 - pwdmnt = &p->mnt; 4297 - } 4278 + old_root = new_fs->root; 4279 + old_pwd = new_fs->pwd; 4280 + 4281 + new_fs->root.mnt = mntget(&new->mnt); 4282 + new_fs->root.dentry = dget(new->mnt.mnt_root); 4283 + 4284 + new_fs->pwd.mnt = mntget(&new->mnt); 4285 + new_fs->pwd.dentry = dget(new->mnt.mnt_root); 4298 4286 } 4299 - p = next_mnt(p, old); 4300 - q = next_mnt(q, new); 4301 - if (!q) 4302 - break; 4303 - // an mntns binding we'd skipped? 4304 - while (p->mnt.mnt_root != q->mnt.mnt_root) 4305 - p = next_mnt(skip_mnt_tree(p), old); 4287 + mnt_add_to_ns(new_ns, new); 4288 + new_ns->nr_mounts++; 4289 + } else { 4290 + /* 4291 + * Full copy: walk old and new trees in parallel, switching 4292 + * the tsk->fs->* elements and marking new vfsmounts as 4293 + * belonging to new namespace. We have already acquired a 4294 + * private fs_struct, so tsk->fs->lock is not needed. 4295 + */ 4296 + p = old; 4297 + q = new; 4298 + while (p) { 4299 + mnt_add_to_ns(new_ns, q); 4300 + new_ns->nr_mounts++; 4301 + if (new_fs) { 4302 + if (&p->mnt == new_fs->root.mnt) { 4303 + old_root.mnt = new_fs->root.mnt; 4304 + new_fs->root.mnt = mntget(&q->mnt); 4305 + } 4306 + if (&p->mnt == new_fs->pwd.mnt) { 4307 + old_pwd.mnt = new_fs->pwd.mnt; 4308 + new_fs->pwd.mnt = mntget(&q->mnt); 4309 + } 4310 + } 4311 + p = next_mnt(p, old); 4312 + q = next_mnt(q, new); 4313 + if (!q) 4314 + break; 4315 + // an mntns binding we'd skipped? 4316 + while (p->mnt.mnt_root != q->mnt.mnt_root) 4317 + p = next_mnt(skip_mnt_tree(p), old); 4318 + } 4306 4319 } 4307 4320 ns_tree_add_raw(new_ns); 4308 4321 return new_ns;
+7
include/uapi/linux/sched.h
··· 36 36 /* Flags for the clone3() syscall. */ 37 37 #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ 38 38 #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ 39 + #define CLONE_EMPTY_MNTNS (1ULL << 37) /* Create an empty mount namespace. */ 39 40 40 41 /* 41 42 * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 42 43 * syscalls only: 43 44 */ 44 45 #define CLONE_NEWTIME 0x00000080 /* New time namespace */ 46 + 47 + /* 48 + * unshare flags share the bit space with clone flags but only apply to the 49 + * unshare syscall: 50 + */ 51 + #define UNSHARE_EMPTY_MNTNS 0x00100000 /* Unshare an empty mount namespace. */ 45 52 46 53 #ifndef __ASSEMBLY__ 47 54 /**
+15 -2
kernel/fork.c
··· 2620 2620 pid_t nr; 2621 2621 2622 2622 /* 2623 + * Creating an empty mount namespace implies creating a new mount 2624 + * namespace. Set this before copy_process() so that the 2625 + * CLONE_NEWNS|CLONE_FS mutual exclusion check works correctly. 2626 + */ 2627 + if (clone_flags & CLONE_EMPTY_MNTNS) { 2628 + clone_flags |= CLONE_NEWNS; 2629 + args->flags = clone_flags; 2630 + } 2631 + 2632 + /* 2623 2633 * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument 2624 2634 * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are 2625 2635 * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate ··· 2907 2897 { 2908 2898 /* Verify that no unknown flags are passed along. */ 2909 2899 if (kargs->flags & 2910 - ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) 2900 + ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | 2901 + CLONE_INTO_CGROUP | CLONE_EMPTY_MNTNS)) 2911 2902 return false; 2912 2903 2913 2904 /* ··· 3061 3050 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 3062 3051 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| 3063 3052 CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP| 3064 - CLONE_NEWTIME)) 3053 + CLONE_NEWTIME | UNSHARE_EMPTY_MNTNS)) 3065 3054 return -EINVAL; 3066 3055 /* 3067 3056 * Not implemented, but pretend it works if there is nothing ··· 3160 3149 /* 3161 3150 * If unsharing namespace, must also unshare filesystem information. 3162 3151 */ 3152 + if (unshare_flags & UNSHARE_EMPTY_MNTNS) 3153 + unshare_flags |= CLONE_NEWNS; 3163 3154 if (unshare_flags & CLONE_NEWNS) 3164 3155 unshare_flags |= CLONE_FS; 3165 3156
+16 -5
kernel/nsproxy.c
··· 95 95 if (!new_nsp) 96 96 return ERR_PTR(-ENOMEM); 97 97 98 - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); 98 + new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, 99 + user_ns, new_fs); 99 100 if (IS_ERR(new_nsp->mnt_ns)) { 100 101 err = PTR_ERR(new_nsp->mnt_ns); 101 102 goto out_ns; ··· 213 212 struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) 214 213 { 215 214 struct user_namespace *user_ns; 215 + u64 flags = unshare_flags; 216 216 int err = 0; 217 217 218 - if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 219 - CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | 220 - CLONE_NEWTIME))) 218 + if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 219 + CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | 220 + CLONE_NEWTIME))) 221 221 return 0; 222 222 223 223 user_ns = new_cred ? new_cred->user_ns : current_user_ns(); 224 224 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 225 225 return -EPERM; 226 226 227 - *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, 227 + /* 228 + * Convert the 32-bit UNSHARE_EMPTY_MNTNS (which aliases 229 + * CLONE_PARENT_SETTID) to the unique 64-bit CLONE_EMPTY_MNTNS. 230 + */ 231 + if (flags & UNSHARE_EMPTY_MNTNS) { 232 + flags &= ~(u64)UNSHARE_EMPTY_MNTNS; 233 + flags |= CLONE_EMPTY_MNTNS; 234 + } 235 + 236 + *new_nsp = create_new_namespaces(flags, current, user_ns, 228 237 new_fs ? new_fs : current->fs); 229 238 if (IS_ERR(*new_nsp)) { 230 239 err = PTR_ERR(*new_nsp);
+4
tools/testing/selftests/filesystems/empty_mntns/.gitignore
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + clone3_empty_mntns_test 3 + empty_mntns_test 4 + overmount_chroot_test
+12
tools/testing/selftests/filesystems/empty_mntns/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) 4 + LDLIBS += -lcap 5 + 6 + TEST_GEN_PROGS := empty_mntns_test overmount_chroot_test clone3_empty_mntns_test 7 + 8 + include ../../lib.mk 9 + 10 + $(OUTPUT)/empty_mntns_test: ../utils.c 11 + $(OUTPUT)/overmount_chroot_test: ../utils.c 12 + $(OUTPUT)/clone3_empty_mntns_test: ../utils.c
+938
tools/testing/selftests/filesystems/empty_mntns/clone3_empty_mntns_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Tests for empty mount namespace creation via clone3() CLONE_EMPTY_MNTNS 4 + * 5 + * These tests exercise the clone3() code path for creating empty mount 6 + * namespaces, which is distinct from the unshare() path tested in 7 + * empty_mntns_test.c. With clone3(), CLONE_EMPTY_MNTNS (0x400000000ULL) 8 + * is a 64-bit flag that implies CLONE_NEWNS. The implication happens in 9 + * kernel_clone() before copy_process(), unlike unshare() where it goes 10 + * through UNSHARE_EMPTY_MNTNS -> CLONE_EMPTY_MNTNS conversion in 11 + * unshare_nsproxy_namespaces(). 12 + * 13 + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> 14 + */ 15 + 16 + #define _GNU_SOURCE 17 + #include <fcntl.h> 18 + #include <linux/mount.h> 19 + #include <linux/stat.h> 20 + #include <stdio.h> 21 + #include <string.h> 22 + #include <sys/mount.h> 23 + #include <sys/stat.h> 24 + #include <sys/types.h> 25 + #include <unistd.h> 26 + 27 + #include "../utils.h" 28 + #include "../wrappers.h" 29 + #include "clone3/clone3_selftests.h" 30 + #include "empty_mntns.h" 31 + #include "kselftest_harness.h" 32 + 33 + static pid_t clone3_empty_mntns(uint64_t extra_flags) 34 + { 35 + struct __clone_args args = { 36 + .flags = CLONE_EMPTY_MNTNS | extra_flags, 37 + .exit_signal = SIGCHLD, 38 + }; 39 + 40 + return sys_clone3(&args, sizeof(args)); 41 + } 42 + 43 + static bool clone3_empty_mntns_supported(void) 44 + { 45 + pid_t pid; 46 + int status; 47 + 48 + pid = fork(); 49 + if (pid < 0) 50 + return false; 51 + 52 + if (pid == 0) { 53 + if (enter_userns()) 54 + _exit(1); 55 + 56 + pid = clone3_empty_mntns(0); 57 + if (pid < 0) 58 + _exit(1); 59 + 60 + if (pid == 0) 61 + _exit(0); 62 + 63 + _exit(wait_for_pid(pid) != 0); 64 + } 65 + 66 + if (waitpid(pid, &status, 0) != pid) 67 + return false; 68 + 69 + if (!WIFEXITED(status)) 70 + return false; 71 + 72 + return WEXITSTATUS(status) == 0; 73 + } 74 + 75 + FIXTURE(clone3_empty_mntns) {}; 76 + 77 + FIXTURE_SETUP(clone3_empty_mntns) 78 + { 79 + if (!clone3_empty_mntns_supported()) 80 + SKIP(return, "CLONE_EMPTY_MNTNS via clone3 not supported"); 81 + } 82 + 83 + FIXTURE_TEARDOWN(clone3_empty_mntns) {} 84 + 85 + /* 86 + * Basic clone3() with CLONE_EMPTY_MNTNS: child gets empty mount namespace 87 + * with exactly 1 mount and root == cwd. 88 + */ 89 + TEST_F(clone3_empty_mntns, basic) 90 + { 91 + pid_t pid, inner; 92 + 93 + pid = fork(); 94 + ASSERT_GE(pid, 0); 95 + 96 + if (pid == 0) { 97 + if (enter_userns()) 98 + _exit(1); 99 + 100 + inner = clone3_empty_mntns(0); 101 + if (inner < 0) 102 + _exit(2); 103 + 104 + if (inner == 0) { 105 + uint64_t root_id, cwd_id; 106 + 107 + if (count_mounts() != 1) 108 + _exit(3); 109 + 110 + root_id = get_unique_mnt_id("/"); 111 + cwd_id = get_unique_mnt_id("."); 112 + if (root_id == 0 || cwd_id == 0) 113 + _exit(4); 114 + 115 + if (root_id != cwd_id) 116 + _exit(5); 117 + 118 + _exit(0); 119 + } 120 + 121 + _exit(wait_for_pid(inner)); 122 + } 123 + 124 + ASSERT_EQ(wait_for_pid(pid), 0); 125 + } 126 + 127 + /* 128 + * CLONE_EMPTY_MNTNS implies CLONE_NEWNS. Verify that it works without 129 + * explicitly setting CLONE_NEWNS (tests fork.c:2627-2630). 130 + */ 131 + TEST_F(clone3_empty_mntns, implies_newns) 132 + { 133 + pid_t pid, inner; 134 + 135 + pid = fork(); 136 + ASSERT_GE(pid, 0); 137 + 138 + if (pid == 0) { 139 + ssize_t parent_mounts; 140 + 141 + if (enter_userns()) 142 + _exit(1); 143 + 144 + /* Verify we have mounts in our current namespace. */ 145 + parent_mounts = count_mounts(); 146 + if (parent_mounts < 1) 147 + _exit(2); 148 + 149 + /* Only CLONE_EMPTY_MNTNS, no explicit CLONE_NEWNS. */ 150 + inner = clone3_empty_mntns(0); 151 + if (inner < 0) 152 + _exit(3); 153 + 154 + if (inner == 0) { 155 + if (count_mounts() != 1) 156 + _exit(4); 157 + 158 + _exit(0); 159 + } 160 + 161 + /* Parent still has its mounts. */ 162 + if (count_mounts() != parent_mounts) 163 + _exit(5); 164 + 165 + _exit(wait_for_pid(inner)); 166 + } 167 + 168 + ASSERT_EQ(wait_for_pid(pid), 0); 169 + } 170 + 171 + /* 172 + * Helper macro: generate a test that clones with CLONE_EMPTY_MNTNS | 173 + * @extra_flags and verifies the child has exactly one mount. 174 + */ 175 + #define TEST_CLONE3_FLAGS(test_name, extra_flags) \ 176 + TEST_F(clone3_empty_mntns, test_name) \ 177 + { \ 178 + pid_t pid, inner; \ 179 + \ 180 + pid = fork(); \ 181 + ASSERT_GE(pid, 0); \ 182 + \ 183 + if (pid == 0) { \ 184 + if (enter_userns()) \ 185 + _exit(1); \ 186 + \ 187 + inner = clone3_empty_mntns(extra_flags); \ 188 + if (inner < 0) \ 189 + _exit(2); \ 190 + \ 191 + if (inner == 0) { \ 192 + if (count_mounts() != 1) \ 193 + _exit(3); \ 194 + _exit(0); \ 195 + } \ 196 + \ 197 + _exit(wait_for_pid(inner)); \ 198 + } \ 199 + \ 200 + ASSERT_EQ(wait_for_pid(pid), 0); \ 201 + } 202 + 203 + /* Redundant CLONE_NEWNS | CLONE_EMPTY_MNTNS should succeed. */ 204 + TEST_CLONE3_FLAGS(with_explicit_newns, CLONE_NEWNS) 205 + 206 + /* CLONE_EMPTY_MNTNS combined with CLONE_NEWUSER. */ 207 + TEST_CLONE3_FLAGS(with_newuser, CLONE_NEWUSER) 208 + 209 + /* CLONE_EMPTY_MNTNS combined with other namespace flags. */ 210 + TEST_CLONE3_FLAGS(with_other_ns_flags, CLONE_NEWUTS | CLONE_NEWIPC) 211 + 212 + /* 213 + * CLONE_EMPTY_MNTNS combined with CLONE_NEWPID. 214 + */ 215 + TEST_F(clone3_empty_mntns, with_newpid) 216 + { 217 + pid_t pid, inner; 218 + 219 + pid = fork(); 220 + ASSERT_GE(pid, 0); 221 + 222 + if (pid == 0) { 223 + if (enter_userns()) 224 + _exit(1); 225 + 226 + inner = clone3_empty_mntns(CLONE_NEWPID); 227 + if (inner < 0) 228 + _exit(2); 229 + 230 + if (inner == 0) { 231 + if (count_mounts() != 1) 232 + _exit(3); 233 + 234 + /* In a new PID namespace, getpid() returns 1. */ 235 + if (getpid() != 1) 236 + _exit(4); 237 + 238 + _exit(0); 239 + } 240 + 241 + _exit(wait_for_pid(inner)); 242 + } 243 + 244 + ASSERT_EQ(wait_for_pid(pid), 0); 245 + } 246 + 247 + /* 248 + * CLONE_EMPTY_MNTNS | CLONE_FS must fail because the implied CLONE_NEWNS 249 + * and CLONE_FS are mutually exclusive (fork.c:1981). 250 + */ 251 + TEST_F(clone3_empty_mntns, with_clone_fs_fails) 252 + { 253 + pid_t pid; 254 + 255 + pid = fork(); 256 + ASSERT_GE(pid, 0); 257 + 258 + if (pid == 0) { 259 + struct __clone_args args = { 260 + .flags = CLONE_EMPTY_MNTNS | CLONE_FS, 261 + .exit_signal = SIGCHLD, 262 + }; 263 + pid_t ret; 264 + 265 + if (enter_userns()) 266 + _exit(1); 267 + 268 + ret = sys_clone3(&args, sizeof(args)); 269 + if (ret >= 0) { 270 + if (ret == 0) 271 + _exit(0); 272 + wait_for_pid(ret); 273 + _exit(2); 274 + } 275 + 276 + if (errno != EINVAL) 277 + _exit(3); 278 + 279 + _exit(0); 280 + } 281 + 282 + ASSERT_EQ(wait_for_pid(pid), 0); 283 + } 284 + 285 + /* 286 + * CLONE_EMPTY_MNTNS combined with CLONE_PIDFD returns a valid pidfd. 287 + */ 288 + TEST_F(clone3_empty_mntns, with_pidfd) 289 + { 290 + pid_t pid; 291 + 292 + pid = fork(); 293 + ASSERT_GE(pid, 0); 294 + 295 + if (pid == 0) { 296 + struct __clone_args args = { 297 + .flags = CLONE_EMPTY_MNTNS | CLONE_PIDFD, 298 + .exit_signal = SIGCHLD, 299 + }; 300 + int pidfd = -1; 301 + pid_t inner; 302 + 303 + if (enter_userns()) 304 + _exit(1); 305 + 306 + args.pidfd = (uintptr_t)&pidfd; 307 + 308 + inner = sys_clone3(&args, sizeof(args)); 309 + if (inner < 0) 310 + _exit(2); 311 + 312 + if (inner == 0) { 313 + if (count_mounts() != 1) 314 + _exit(3); 315 + 316 + _exit(0); 317 + } 318 + 319 + /* Verify we got a valid pidfd. */ 320 + if (pidfd < 0) 321 + _exit(4); 322 + 323 + close(pidfd); 324 + _exit(wait_for_pid(inner)); 325 + } 326 + 327 + ASSERT_EQ(wait_for_pid(pid), 0); 328 + } 329 + 330 + /* 331 + * clone3 without CAP_SYS_ADMIN must fail with EPERM. 332 + */ 333 + TEST_F(clone3_empty_mntns, eperm_without_caps) 334 + { 335 + pid_t pid; 336 + 337 + pid = fork(); 338 + ASSERT_GE(pid, 0); 339 + 340 + if (pid == 0) { 341 + pid_t ret; 342 + 343 + /* Skip if already root. */ 344 + if (getuid() == 0) 345 + _exit(0); 346 + 347 + ret = clone3_empty_mntns(0); 348 + if (ret >= 0) { 349 + if (ret == 0) 350 + _exit(0); 351 + wait_for_pid(ret); 352 + _exit(1); 353 + } 354 + 355 + if (errno != EPERM) 356 + _exit(2); 357 + 358 + _exit(0); 359 + } 360 + 361 + ASSERT_EQ(wait_for_pid(pid), 0); 362 + } 363 + 364 + /* 365 + * Parent's mount namespace is unaffected after clone3 with CLONE_EMPTY_MNTNS. 366 + */ 367 + TEST_F(clone3_empty_mntns, parent_unchanged) 368 + { 369 + pid_t pid; 370 + 371 + pid = fork(); 372 + ASSERT_GE(pid, 0); 373 + 374 + if (pid == 0) { 375 + ssize_t nr_before, nr_after; 376 + pid_t inner; 377 + 378 + if (enter_userns()) 379 + _exit(1); 380 + 381 + nr_before = count_mounts(); 382 + if (nr_before < 1) 383 + _exit(2); 384 + 385 + inner = clone3_empty_mntns(0); 386 + if (inner < 0) 387 + _exit(3); 388 + 389 + if (inner == 0) 390 + _exit(0); 391 + 392 + if (wait_for_pid(inner) != 0) 393 + _exit(4); 394 + 395 + nr_after = count_mounts(); 396 + if (nr_after != nr_before) 397 + _exit(5); 398 + 399 + _exit(0); 400 + } 401 + 402 + ASSERT_EQ(wait_for_pid(pid), 0); 403 + } 404 + 405 + /* 406 + * Parent with many mounts: child still gets exactly 1 mount. 407 + */ 408 + TEST_F(clone3_empty_mntns, many_parent_mounts) 409 + { 410 + pid_t pid; 411 + 412 + pid = fork(); 413 + ASSERT_GE(pid, 0); 414 + 415 + if (pid == 0) { 416 + char tmpdir[] = "/tmp/clone3_mntns_test.XXXXXX"; 417 + pid_t inner; 418 + int i; 419 + 420 + if (enter_userns()) 421 + _exit(1); 422 + 423 + if (unshare(CLONE_NEWNS)) 424 + _exit(2); 425 + 426 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 427 + _exit(3); 428 + 429 + if (!mkdtemp(tmpdir)) 430 + _exit(4); 431 + 432 + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 433 + _exit(5); 434 + 435 + for (i = 0; i < 5; i++) { 436 + char subdir[256]; 437 + 438 + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); 439 + if (mkdir(subdir, 0755) && errno != EEXIST) 440 + _exit(6); 441 + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) 442 + _exit(7); 443 + } 444 + 445 + if (count_mounts() < 5) 446 + _exit(8); 447 + 448 + inner = clone3_empty_mntns(0); 449 + if (inner < 0) 450 + _exit(9); 451 + 452 + if (inner == 0) { 453 + if (count_mounts() != 1) 454 + _exit(10); 455 + 456 + _exit(0); 457 + } 458 + 459 + _exit(wait_for_pid(inner)); 460 + } 461 + 462 + ASSERT_EQ(wait_for_pid(pid), 0); 463 + } 464 + 465 + /* 466 + * Verify the child's root mount is nullfs with expected statmount properties. 467 + */ 468 + TEST_F(clone3_empty_mntns, mount_properties) 469 + { 470 + pid_t pid; 471 + 472 + pid = fork(); 473 + ASSERT_GE(pid, 0); 474 + 475 + if (pid == 0) { 476 + pid_t inner; 477 + 478 + if (enter_userns()) 479 + _exit(1); 480 + 481 + inner = clone3_empty_mntns(0); 482 + if (inner < 0) 483 + _exit(2); 484 + 485 + if (inner == 0) { 486 + struct statmount *sm; 487 + uint64_t root_id; 488 + 489 + root_id = get_unique_mnt_id("/"); 490 + if (!root_id) 491 + _exit(3); 492 + 493 + sm = statmount_alloc(root_id, 0, 494 + STATMOUNT_MNT_BASIC | 495 + STATMOUNT_MNT_POINT | 496 + STATMOUNT_FS_TYPE); 497 + if (!sm) 498 + _exit(4); 499 + 500 + /* Root mount point is "/". */ 501 + if (!(sm->mask & STATMOUNT_MNT_POINT)) 502 + _exit(5); 503 + if (strcmp(sm->str + sm->mnt_point, "/") != 0) 504 + _exit(6); 505 + 506 + /* Filesystem type is nullfs. */ 507 + if (!(sm->mask & STATMOUNT_FS_TYPE)) 508 + _exit(7); 509 + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) 510 + _exit(8); 511 + 512 + /* Root mount is its own parent. */ 513 + if (!(sm->mask & STATMOUNT_MNT_BASIC)) 514 + _exit(9); 515 + if (sm->mnt_parent_id != sm->mnt_id) 516 + _exit(10); 517 + 518 + free(sm); 519 + _exit(0); 520 + } 521 + 522 + _exit(wait_for_pid(inner)); 523 + } 524 + 525 + ASSERT_EQ(wait_for_pid(pid), 0); 526 + } 527 + 528 + /* 529 + * Listmount returns only the root mount in the child's empty namespace. 530 + */ 531 + TEST_F(clone3_empty_mntns, listmount_single_entry) 532 + { 533 + pid_t pid; 534 + 535 + pid = fork(); 536 + ASSERT_GE(pid, 0); 537 + 538 + if (pid == 0) { 539 + pid_t inner; 540 + 541 + if (enter_userns()) 542 + _exit(1); 543 + 544 + inner = clone3_empty_mntns(0); 545 + if (inner < 0) 546 + _exit(2); 547 + 548 + if (inner == 0) { 549 + uint64_t list[16]; 550 + ssize_t nr_mounts; 551 + uint64_t root_id; 552 + 553 + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0); 554 + if (nr_mounts != 1) 555 + _exit(3); 556 + 557 + root_id = get_unique_mnt_id("/"); 558 + if (!root_id) 559 + _exit(4); 560 + 561 + if (list[0] != root_id) 562 + _exit(5); 563 + 564 + _exit(0); 565 + } 566 + 567 + _exit(wait_for_pid(inner)); 568 + } 569 + 570 + ASSERT_EQ(wait_for_pid(pid), 0); 571 + } 572 + 573 + /* 574 + * Child can mount tmpfs over nullfs root (the primary container use case). 575 + * 576 + * Uses the new mount API (fsopen/fsmount/move_mount) because resolving 577 + * "/" returns the process root directly without following overmounts. 578 + * The mount fd from fsmount lets us fchdir + chroot into the new tmpfs. 579 + */ 580 + TEST_F(clone3_empty_mntns, child_overmount_tmpfs) 581 + { 582 + pid_t pid; 583 + 584 + pid = fork(); 585 + ASSERT_GE(pid, 0); 586 + 587 + if (pid == 0) { 588 + pid_t inner; 589 + 590 + if (enter_userns()) 591 + _exit(1); 592 + 593 + inner = clone3_empty_mntns(0); 594 + if (inner < 0) 595 + _exit(2); 596 + 597 + if (inner == 0) { 598 + struct statmount *sm; 599 + uint64_t root_id; 600 + int fd, fsfd, mntfd; 601 + 602 + if (count_mounts() != 1) 603 + _exit(3); 604 + 605 + /* Verify root is nullfs. */ 606 + root_id = get_unique_mnt_id("/"); 607 + if (!root_id) 608 + _exit(4); 609 + 610 + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE); 611 + if (!sm) 612 + _exit(5); 613 + if (!(sm->mask & STATMOUNT_FS_TYPE)) 614 + _exit(6); 615 + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) 616 + _exit(7); 617 + free(sm); 618 + 619 + /* Create tmpfs via the new mount API. */ 620 + fsfd = sys_fsopen("tmpfs", 0); 621 + if (fsfd < 0) 622 + _exit(8); 623 + 624 + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, 625 + "size", "1M", 0)) { 626 + close(fsfd); 627 + _exit(9); 628 + } 629 + 630 + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, 631 + NULL, NULL, 0)) { 632 + close(fsfd); 633 + _exit(10); 634 + } 635 + 636 + mntfd = sys_fsmount(fsfd, 0, 0); 637 + close(fsfd); 638 + if (mntfd < 0) 639 + _exit(11); 640 + 641 + /* Attach tmpfs to "/". */ 642 + if (sys_move_mount(mntfd, "", AT_FDCWD, "/", 643 + MOVE_MOUNT_F_EMPTY_PATH)) { 644 + close(mntfd); 645 + _exit(12); 646 + } 647 + 648 + if (count_mounts() != 2) { 649 + close(mntfd); 650 + _exit(13); 651 + } 652 + 653 + /* Enter the tmpfs. */ 654 + if (fchdir(mntfd)) { 655 + close(mntfd); 656 + _exit(14); 657 + } 658 + 659 + if (chroot(".")) { 660 + close(mntfd); 661 + _exit(15); 662 + } 663 + 664 + close(mntfd); 665 + 666 + /* Verify "/" is now tmpfs. */ 667 + root_id = get_unique_mnt_id("/"); 668 + if (!root_id) 669 + _exit(16); 670 + 671 + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE); 672 + if (!sm) 673 + _exit(17); 674 + if (!(sm->mask & STATMOUNT_FS_TYPE)) 675 + _exit(18); 676 + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) 677 + _exit(19); 678 + free(sm); 679 + 680 + /* Verify tmpfs is writable. */ 681 + fd = open("/testfile", O_CREAT | O_RDWR, 0644); 682 + if (fd < 0) 683 + _exit(20); 684 + 685 + if (write(fd, "test", 4) != 4) { 686 + close(fd); 687 + _exit(21); 688 + } 689 + close(fd); 690 + 691 + if (access("/testfile", F_OK)) 692 + _exit(22); 693 + 694 + _exit(0); 695 + } 696 + 697 + _exit(wait_for_pid(inner)); 698 + } 699 + 700 + ASSERT_EQ(wait_for_pid(pid), 0); 701 + } 702 + 703 + /* 704 + * Multiple clone3 calls with CLONE_EMPTY_MNTNS produce children with 705 + * distinct mount namespace root mount IDs. 706 + */ 707 + TEST_F(clone3_empty_mntns, repeated) 708 + { 709 + pid_t pid; 710 + 711 + pid = fork(); 712 + ASSERT_GE(pid, 0); 713 + 714 + if (pid == 0) { 715 + int pipe1[2], pipe2[2]; 716 + uint64_t id1 = 0, id2 = 0; 717 + pid_t inner1, inner2; 718 + 719 + if (enter_userns()) 720 + _exit(1); 721 + 722 + if (pipe(pipe1) || pipe(pipe2)) 723 + _exit(2); 724 + 725 + inner1 = clone3_empty_mntns(0); 726 + if (inner1 < 0) 727 + _exit(3); 728 + 729 + if (inner1 == 0) { 730 + uint64_t root_id; 731 + 732 + close(pipe1[0]); 733 + root_id = get_unique_mnt_id("/"); 734 + if (write(pipe1[1], &root_id, sizeof(root_id)) != sizeof(root_id)) 735 + _exit(1); 736 + close(pipe1[1]); 737 + _exit(0); 738 + } 739 + 740 + inner2 = clone3_empty_mntns(0); 741 + if (inner2 < 0) 742 + _exit(4); 743 + 744 + if (inner2 == 0) { 745 + uint64_t root_id; 746 + 747 + close(pipe2[0]); 748 + root_id = get_unique_mnt_id("/"); 749 + if (write(pipe2[1], &root_id, sizeof(root_id)) != sizeof(root_id)) 750 + _exit(1); 751 + close(pipe2[1]); 752 + _exit(0); 753 + } 754 + 755 + close(pipe1[1]); 756 + close(pipe2[1]); 757 + 758 + if (read(pipe1[0], &id1, sizeof(id1)) != sizeof(id1)) 759 + _exit(5); 760 + if (read(pipe2[0], &id2, sizeof(id2)) != sizeof(id2)) 761 + _exit(6); 762 + 763 + close(pipe1[0]); 764 + close(pipe2[0]); 765 + 766 + if (wait_for_pid(inner1) || wait_for_pid(inner2)) 767 + _exit(7); 768 + 769 + /* Each child must have a distinct root mount ID. */ 770 + if (id1 == 0 || id2 == 0) 771 + _exit(8); 772 + if (id1 == id2) 773 + _exit(9); 774 + 775 + _exit(0); 776 + } 777 + 778 + ASSERT_EQ(wait_for_pid(pid), 0); 779 + } 780 + 781 + /* 782 + * Verify setns() into a child's empty mount namespace works. 783 + */ 784 + TEST_F(clone3_empty_mntns, setns_into_child_mntns) 785 + { 786 + pid_t pid; 787 + 788 + pid = fork(); 789 + ASSERT_GE(pid, 0); 790 + 791 + if (pid == 0) { 792 + int pipe_fd[2]; 793 + pid_t inner; 794 + char c; 795 + 796 + if (enter_userns()) 797 + _exit(1); 798 + 799 + if (pipe(pipe_fd)) 800 + _exit(2); 801 + 802 + inner = clone3_empty_mntns(0); 803 + if (inner < 0) 804 + _exit(3); 805 + 806 + if (inner == 0) { 807 + /* Signal parent we're ready. */ 808 + close(pipe_fd[0]); 809 + if (write(pipe_fd[1], "r", 1) != 1) 810 + _exit(1); 811 + 812 + /* 813 + * Wait for parent to finish. Reading from our 814 + * write end will block until the parent closes 815 + * its read end, giving us an implicit barrier. 816 + */ 817 + if (read(pipe_fd[1], &c, 1) < 0) 818 + ; 819 + close(pipe_fd[1]); 820 + _exit(0); 821 + } 822 + 823 + close(pipe_fd[1]); 824 + 825 + /* Wait for child to be ready. */ 826 + if (read(pipe_fd[0], &c, 1) != 1) 827 + _exit(4); 828 + 829 + /* Open child's mount namespace. */ 830 + { 831 + char path[64]; 832 + int mntns_fd; 833 + 834 + snprintf(path, sizeof(path), "/proc/%d/ns/mnt", inner); 835 + mntns_fd = open(path, O_RDONLY); 836 + if (mntns_fd < 0) 837 + _exit(5); 838 + 839 + if (setns(mntns_fd, CLONE_NEWNS)) 840 + _exit(6); 841 + 842 + close(mntns_fd); 843 + } 844 + 845 + /* Now we should be in the child's empty mntns. */ 846 + if (count_mounts() != 1) 847 + _exit(7); 848 + 849 + close(pipe_fd[0]); 850 + _exit(wait_for_pid(inner)); 851 + } 852 + 853 + ASSERT_EQ(wait_for_pid(pid), 0); 854 + } 855 + 856 + /* 857 + * Tests below do not require CLONE_EMPTY_MNTNS support. 858 + */ 859 + 860 + /* 861 + * Unknown 64-bit flags beyond the known set are rejected. 862 + */ 863 + TEST(unknown_flags_rejected) 864 + { 865 + pid_t pid; 866 + 867 + pid = fork(); 868 + ASSERT_GE(pid, 0); 869 + 870 + if (pid == 0) { 871 + struct __clone_args args = { 872 + .flags = 0x800000000ULL, 873 + .exit_signal = SIGCHLD, 874 + }; 875 + pid_t ret; 876 + 877 + ret = sys_clone3(&args, sizeof(args)); 878 + if (ret >= 0) { 879 + if (ret == 0) 880 + _exit(0); 881 + wait_for_pid(ret); 882 + _exit(1); 883 + } 884 + 885 + if (errno != EINVAL) 886 + _exit(2); 887 + 888 + _exit(0); 889 + } 890 + 891 + ASSERT_EQ(wait_for_pid(pid), 0); 892 + } 893 + 894 + /* 895 + * Regular clone3 with CLONE_NEWNS (without CLONE_EMPTY_MNTNS) still 896 + * copies the full mount tree. 897 + */ 898 + TEST(clone3_newns_full_copy) 899 + { 900 + pid_t pid; 901 + 902 + pid = fork(); 903 + ASSERT_GE(pid, 0); 904 + 905 + if (pid == 0) { 906 + struct __clone_args args = { 907 + .flags = CLONE_NEWNS, 908 + .exit_signal = SIGCHLD, 909 + }; 910 + ssize_t parent_mounts; 911 + pid_t inner; 912 + 913 + if (enter_userns()) 914 + _exit(1); 915 + 916 + parent_mounts = count_mounts(); 917 + if (parent_mounts < 1) 918 + _exit(2); 919 + 920 + inner = sys_clone3(&args, sizeof(args)); 921 + if (inner < 0) 922 + _exit(3); 923 + 924 + if (inner == 0) { 925 + /* Full copy should have at least as many mounts. */ 926 + if (count_mounts() < parent_mounts) 927 + _exit(1); 928 + 929 + _exit(0); 930 + } 931 + 932 + _exit(wait_for_pid(inner)); 933 + } 934 + 935 + ASSERT_EQ(wait_for_pid(pid), 0); 936 + } 937 + 938 + TEST_HARNESS_MAIN
+50
tools/testing/selftests/filesystems/empty_mntns/empty_mntns.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + #ifndef EMPTY_MNTNS_H 3 + #define EMPTY_MNTNS_H 4 + 5 + #include <errno.h> 6 + #include <stdlib.h> 7 + 8 + #include "../statmount/statmount.h" 9 + 10 + #ifndef UNSHARE_EMPTY_MNTNS 11 + #define UNSHARE_EMPTY_MNTNS 0x00100000 12 + #endif 13 + 14 + #ifndef CLONE_EMPTY_MNTNS 15 + #define CLONE_EMPTY_MNTNS (1ULL << 37) 16 + #endif 17 + 18 + static inline ssize_t count_mounts(void) 19 + { 20 + uint64_t list[4096]; 21 + 22 + return listmount(LSMT_ROOT, 0, 0, list, sizeof(list) / sizeof(list[0]), 0); 23 + } 24 + 25 + static inline struct statmount *statmount_alloc(uint64_t mnt_id, 26 + uint64_t mnt_ns_id, 27 + uint64_t mask) 28 + { 29 + size_t bufsize = 1 << 15; 30 + struct statmount *buf; 31 + int ret; 32 + 33 + for (;;) { 34 + buf = malloc(bufsize); 35 + if (!buf) 36 + return NULL; 37 + 38 + ret = statmount(mnt_id, mnt_ns_id, 0, mask, buf, bufsize, 0); 39 + if (ret == 0) 40 + return buf; 41 + 42 + free(buf); 43 + if (errno != EOVERFLOW) 44 + return NULL; 45 + 46 + bufsize <<= 1; 47 + } 48 + } 49 + 50 + #endif /* EMPTY_MNTNS_H */
+725
tools/testing/selftests/filesystems/empty_mntns/empty_mntns_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Tests for empty mount namespace creation via UNSHARE_EMPTY_MNTNS 4 + * 5 + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> 6 + */ 7 + 8 + #define _GNU_SOURCE 9 + #include <fcntl.h> 10 + #include <linux/mount.h> 11 + #include <linux/stat.h> 12 + #include <sched.h> 13 + #include <stdio.h> 14 + #include <string.h> 15 + #include <sys/mount.h> 16 + #include <sys/stat.h> 17 + #include <sys/types.h> 18 + #include <sys/wait.h> 19 + #include <unistd.h> 20 + 21 + #include "../utils.h" 22 + #include "../wrappers.h" 23 + #include "empty_mntns.h" 24 + #include "kselftest_harness.h" 25 + 26 + static bool unshare_empty_mntns_supported(void) 27 + { 28 + pid_t pid; 29 + int status; 30 + 31 + pid = fork(); 32 + if (pid < 0) 33 + return false; 34 + 35 + if (pid == 0) { 36 + if (enter_userns()) 37 + _exit(1); 38 + 39 + if (unshare(UNSHARE_EMPTY_MNTNS) && errno == EINVAL) 40 + _exit(1); 41 + _exit(0); 42 + } 43 + 44 + if (waitpid(pid, &status, 0) != pid) 45 + return false; 46 + 47 + if (!WIFEXITED(status)) 48 + return false; 49 + 50 + return WEXITSTATUS(status) == 0; 51 + } 52 + 53 + 54 + FIXTURE(empty_mntns) {}; 55 + 56 + FIXTURE_SETUP(empty_mntns) 57 + { 58 + if (!unshare_empty_mntns_supported()) 59 + SKIP(return, "UNSHARE_EMPTY_MNTNS not supported"); 60 + } 61 + 62 + FIXTURE_TEARDOWN(empty_mntns) {} 63 + 64 + /* Verify unshare succeeds, produces exactly 1 mount, and root == cwd */ 65 + TEST_F(empty_mntns, basic) 66 + { 67 + pid_t pid; 68 + 69 + pid = fork(); 70 + ASSERT_GE(pid, 0); 71 + 72 + if (pid == 0) { 73 + uint64_t root_id, cwd_id; 74 + 75 + if (enter_userns()) 76 + _exit(1); 77 + 78 + if (unshare(UNSHARE_EMPTY_MNTNS)) 79 + _exit(2); 80 + 81 + if (count_mounts() != 1) 82 + _exit(3); 83 + 84 + root_id = get_unique_mnt_id("/"); 85 + cwd_id = get_unique_mnt_id("."); 86 + if (root_id == 0 || cwd_id == 0) 87 + _exit(4); 88 + 89 + if (root_id != cwd_id) 90 + _exit(5); 91 + 92 + _exit(0); 93 + } 94 + 95 + ASSERT_EQ(wait_for_pid(pid), 0); 96 + } 97 + 98 + /* 99 + * UNSHARE_EMPTY_MNTNS combined with CLONE_NEWUSER. 100 + * 101 + * The user namespace must be created first so /proc is still accessible 102 + * for writing uid_map/gid_map. The empty mount namespace is created 103 + * afterwards. 104 + */ 105 + TEST_F(empty_mntns, with_clone_newuser) 106 + { 107 + pid_t pid; 108 + 109 + pid = fork(); 110 + ASSERT_GE(pid, 0); 111 + 112 + if (pid == 0) { 113 + uid_t uid = getuid(); 114 + gid_t gid = getgid(); 115 + char map[100]; 116 + 117 + if (unshare(CLONE_NEWUSER)) 118 + _exit(1); 119 + 120 + snprintf(map, sizeof(map), "0 %d 1", uid); 121 + if (write_file("/proc/self/uid_map", map)) 122 + _exit(2); 123 + 124 + if (write_file("/proc/self/setgroups", "deny")) 125 + _exit(3); 126 + 127 + snprintf(map, sizeof(map), "0 %d 1", gid); 128 + if (write_file("/proc/self/gid_map", map)) 129 + _exit(4); 130 + 131 + if (unshare(UNSHARE_EMPTY_MNTNS)) 132 + _exit(5); 133 + 134 + if (count_mounts() != 1) 135 + _exit(6); 136 + 137 + _exit(0); 138 + } 139 + 140 + ASSERT_EQ(wait_for_pid(pid), 0); 141 + } 142 + 143 + /* UNSHARE_EMPTY_MNTNS combined with other namespace flags */ 144 + TEST_F(empty_mntns, with_other_ns_flags) 145 + { 146 + pid_t pid; 147 + 148 + pid = fork(); 149 + ASSERT_GE(pid, 0); 150 + 151 + if (pid == 0) { 152 + if (enter_userns()) 153 + _exit(1); 154 + 155 + if (unshare(UNSHARE_EMPTY_MNTNS | CLONE_NEWUTS | CLONE_NEWIPC)) 156 + _exit(2); 157 + 158 + if (count_mounts() != 1) 159 + _exit(3); 160 + 161 + _exit(0); 162 + } 163 + 164 + ASSERT_EQ(wait_for_pid(pid), 0); 165 + } 166 + 167 + /* EPERM without proper capabilities */ 168 + TEST_F(empty_mntns, eperm_without_caps) 169 + { 170 + pid_t pid; 171 + 172 + pid = fork(); 173 + ASSERT_GE(pid, 0); 174 + 175 + if (pid == 0) { 176 + /* Skip if already root */ 177 + if (getuid() == 0) 178 + _exit(0); 179 + 180 + if (unshare(UNSHARE_EMPTY_MNTNS) == 0) 181 + _exit(1); 182 + 183 + if (errno != EPERM) 184 + _exit(2); 185 + 186 + _exit(0); 187 + } 188 + 189 + ASSERT_EQ(wait_for_pid(pid), 0); 190 + } 191 + 192 + /* Many source mounts still result in exactly 1 mount */ 193 + TEST_F(empty_mntns, many_source_mounts) 194 + { 195 + pid_t pid; 196 + 197 + pid = fork(); 198 + ASSERT_GE(pid, 0); 199 + 200 + if (pid == 0) { 201 + char tmpdir[] = "/tmp/empty_mntns_test.XXXXXX"; 202 + int i; 203 + 204 + if (enter_userns()) 205 + _exit(1); 206 + 207 + if (unshare(CLONE_NEWNS)) 208 + _exit(2); 209 + 210 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 211 + _exit(3); 212 + 213 + if (!mkdtemp(tmpdir)) 214 + _exit(4); 215 + 216 + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 217 + _exit(5); 218 + 219 + for (i = 0; i < 5; i++) { 220 + char subdir[256]; 221 + 222 + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); 223 + if (mkdir(subdir, 0755) && errno != EEXIST) 224 + _exit(6); 225 + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) 226 + _exit(7); 227 + } 228 + 229 + if (count_mounts() < 5) 230 + _exit(8); 231 + 232 + if (unshare(UNSHARE_EMPTY_MNTNS)) 233 + _exit(9); 234 + 235 + if (count_mounts() != 1) 236 + _exit(10); 237 + 238 + _exit(0); 239 + } 240 + 241 + ASSERT_EQ(wait_for_pid(pid), 0); 242 + } 243 + 244 + /* CWD on a different mount gets reset to root */ 245 + TEST_F(empty_mntns, cwd_reset) 246 + { 247 + pid_t pid; 248 + 249 + pid = fork(); 250 + ASSERT_GE(pid, 0); 251 + 252 + if (pid == 0) { 253 + char tmpdir[] = "/tmp/empty_mntns_cwd.XXXXXX"; 254 + uint64_t root_id, cwd_id; 255 + struct statmount *sm; 256 + 257 + if (enter_userns()) 258 + _exit(1); 259 + 260 + if (unshare(CLONE_NEWNS)) 261 + _exit(2); 262 + 263 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 264 + _exit(3); 265 + 266 + if (!mkdtemp(tmpdir)) 267 + _exit(4); 268 + 269 + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 270 + _exit(5); 271 + 272 + if (chdir(tmpdir)) 273 + _exit(6); 274 + 275 + if (unshare(UNSHARE_EMPTY_MNTNS)) 276 + _exit(7); 277 + 278 + root_id = get_unique_mnt_id("/"); 279 + cwd_id = get_unique_mnt_id("."); 280 + if (root_id == 0 || cwd_id == 0) 281 + _exit(8); 282 + 283 + if (root_id != cwd_id) 284 + _exit(9); 285 + 286 + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT); 287 + if (!sm) 288 + _exit(10); 289 + 290 + if (strcmp(sm->str + sm->mnt_point, "/") != 0) 291 + _exit(11); 292 + 293 + free(sm); 294 + _exit(0); 295 + } 296 + 297 + ASSERT_EQ(wait_for_pid(pid), 0); 298 + } 299 + 300 + /* Verify statmount properties of the root mount */ 301 + TEST_F(empty_mntns, mount_properties) 302 + { 303 + pid_t pid; 304 + 305 + pid = fork(); 306 + ASSERT_GE(pid, 0); 307 + 308 + if (pid == 0) { 309 + struct statmount *sm; 310 + uint64_t root_id; 311 + 312 + if (enter_userns()) 313 + _exit(1); 314 + 315 + if (unshare(UNSHARE_EMPTY_MNTNS)) 316 + _exit(2); 317 + 318 + root_id = get_unique_mnt_id("/"); 319 + if (!root_id) 320 + _exit(3); 321 + 322 + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT | 323 + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE); 324 + if (!sm) 325 + _exit(4); 326 + 327 + if (!(sm->mask & STATMOUNT_MNT_POINT)) 328 + _exit(5); 329 + 330 + if (strcmp(sm->str + sm->mnt_point, "/") != 0) 331 + _exit(6); 332 + 333 + if (!(sm->mask & STATMOUNT_MNT_BASIC)) 334 + _exit(7); 335 + 336 + if (sm->mnt_id != root_id) 337 + _exit(8); 338 + 339 + free(sm); 340 + _exit(0); 341 + } 342 + 343 + ASSERT_EQ(wait_for_pid(pid), 0); 344 + } 345 + 346 + /* Consecutive UNSHARE_EMPTY_MNTNS calls produce new namespaces */ 347 + TEST_F(empty_mntns, repeated_unshare) 348 + { 349 + pid_t pid; 350 + 351 + pid = fork(); 352 + ASSERT_GE(pid, 0); 353 + 354 + if (pid == 0) { 355 + uint64_t first_root_id, second_root_id; 356 + 357 + if (enter_userns()) 358 + _exit(1); 359 + 360 + if (unshare(UNSHARE_EMPTY_MNTNS)) 361 + _exit(2); 362 + 363 + if (count_mounts() != 1) 364 + _exit(3); 365 + 366 + first_root_id = get_unique_mnt_id("/"); 367 + 368 + if (unshare(UNSHARE_EMPTY_MNTNS)) 369 + _exit(4); 370 + 371 + if (count_mounts() != 1) 372 + _exit(5); 373 + 374 + second_root_id = get_unique_mnt_id("/"); 375 + 376 + if (first_root_id == second_root_id) 377 + _exit(6); 378 + 379 + _exit(0); 380 + } 381 + 382 + ASSERT_EQ(wait_for_pid(pid), 0); 383 + } 384 + 385 + /* Root mount's parent is itself */ 386 + TEST_F(empty_mntns, root_is_own_parent) 387 + { 388 + pid_t pid; 389 + 390 + pid = fork(); 391 + ASSERT_GE(pid, 0); 392 + 393 + if (pid == 0) { 394 + struct statmount sm; 395 + uint64_t root_id; 396 + 397 + if (enter_userns()) 398 + _exit(1); 399 + 400 + if (unshare(UNSHARE_EMPTY_MNTNS)) 401 + _exit(2); 402 + 403 + root_id = get_unique_mnt_id("/"); 404 + if (!root_id) 405 + _exit(3); 406 + 407 + if (statmount(root_id, 0, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0) < 0) 408 + _exit(4); 409 + 410 + if (!(sm.mask & STATMOUNT_MNT_BASIC)) 411 + _exit(5); 412 + 413 + if (sm.mnt_parent_id != sm.mnt_id) 414 + _exit(6); 415 + 416 + _exit(0); 417 + } 418 + 419 + ASSERT_EQ(wait_for_pid(pid), 0); 420 + } 421 + 422 + /* Listmount returns only the root mount */ 423 + TEST_F(empty_mntns, listmount_single_entry) 424 + { 425 + pid_t pid; 426 + 427 + pid = fork(); 428 + ASSERT_GE(pid, 0); 429 + 430 + if (pid == 0) { 431 + uint64_t list[16]; 432 + ssize_t nr_mounts; 433 + uint64_t root_id; 434 + 435 + if (enter_userns()) 436 + _exit(1); 437 + 438 + if (unshare(UNSHARE_EMPTY_MNTNS)) 439 + _exit(2); 440 + 441 + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0); 442 + if (nr_mounts != 1) 443 + _exit(3); 444 + 445 + root_id = get_unique_mnt_id("/"); 446 + if (!root_id) 447 + _exit(4); 448 + 449 + if (list[0] != root_id) 450 + _exit(5); 451 + 452 + _exit(0); 453 + } 454 + 455 + ASSERT_EQ(wait_for_pid(pid), 0); 456 + } 457 + 458 + /* 459 + * Mount tmpfs over nullfs root to build a writable filesystem from scratch. 460 + * This exercises the intended usage pattern: create an empty mount namespace 461 + * (which has a nullfs root), then mount a real filesystem over it. 462 + * 463 + * Because resolving "/" returns the process root directly (via nd_jump_root) 464 + * without following overmounts, we use the new mount API (fsopen/fsmount) 465 + * to obtain a mount fd, then fchdir + chroot to enter the new filesystem. 466 + */ 467 + TEST_F(empty_mntns, overmount_tmpfs) 468 + { 469 + pid_t pid; 470 + 471 + pid = fork(); 472 + ASSERT_GE(pid, 0); 473 + 474 + if (pid == 0) { 475 + struct statmount *sm; 476 + uint64_t root_id, cwd_id; 477 + int fd, fsfd, mntfd; 478 + 479 + if (enter_userns()) 480 + _exit(1); 481 + 482 + if (unshare(UNSHARE_EMPTY_MNTNS)) 483 + _exit(2); 484 + 485 + if (count_mounts() != 1) 486 + _exit(3); 487 + 488 + root_id = get_unique_mnt_id("/"); 489 + if (!root_id) 490 + _exit(4); 491 + 492 + /* Verify root is nullfs */ 493 + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE); 494 + if (!sm) 495 + _exit(5); 496 + 497 + if (!(sm->mask & STATMOUNT_FS_TYPE)) 498 + _exit(6); 499 + 500 + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) 501 + _exit(7); 502 + 503 + free(sm); 504 + 505 + cwd_id = get_unique_mnt_id("."); 506 + if (!cwd_id || root_id != cwd_id) 507 + _exit(8); 508 + 509 + /* 510 + * nullfs root is immutable. open(O_CREAT) returns ENOENT 511 + * because empty_dir_lookup() returns -ENOENT before the 512 + * IS_IMMUTABLE permission check in may_o_create() is reached. 513 + */ 514 + fd = open("/test", O_CREAT | O_RDWR, 0644); 515 + if (fd >= 0) { 516 + close(fd); 517 + _exit(9); 518 + } 519 + if (errno != ENOENT) 520 + _exit(10); 521 + 522 + /* 523 + * Use the new mount API to create tmpfs and get a mount fd. 524 + * We need the fd because after attaching the tmpfs on top of 525 + * "/", path resolution of "/" still returns the process root 526 + * (nullfs) without following the overmount. The mount fd 527 + * lets us fchdir + chroot into the tmpfs. 528 + */ 529 + fsfd = sys_fsopen("tmpfs", 0); 530 + if (fsfd < 0) 531 + _exit(11); 532 + 533 + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "1M", 0)) { 534 + close(fsfd); 535 + _exit(12); 536 + } 537 + 538 + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { 539 + close(fsfd); 540 + _exit(13); 541 + } 542 + 543 + mntfd = sys_fsmount(fsfd, 0, 0); 544 + close(fsfd); 545 + if (mntfd < 0) 546 + _exit(14); 547 + 548 + if (sys_move_mount(mntfd, "", AT_FDCWD, "/", 549 + MOVE_MOUNT_F_EMPTY_PATH)) { 550 + close(mntfd); 551 + _exit(15); 552 + } 553 + 554 + if (count_mounts() != 2) { 555 + close(mntfd); 556 + _exit(16); 557 + } 558 + 559 + /* Enter the tmpfs via the mount fd */ 560 + if (fchdir(mntfd)) { 561 + close(mntfd); 562 + _exit(17); 563 + } 564 + 565 + if (chroot(".")) { 566 + close(mntfd); 567 + _exit(18); 568 + } 569 + 570 + close(mntfd); 571 + 572 + /* Verify "/" now resolves to tmpfs */ 573 + root_id = get_unique_mnt_id("/"); 574 + if (!root_id) 575 + _exit(19); 576 + 577 + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE); 578 + if (!sm) 579 + _exit(20); 580 + 581 + if (!(sm->mask & STATMOUNT_FS_TYPE)) 582 + _exit(21); 583 + 584 + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) 585 + _exit(22); 586 + 587 + free(sm); 588 + 589 + /* Verify tmpfs is writable */ 590 + fd = open("/testfile", O_CREAT | O_RDWR, 0644); 591 + if (fd < 0) 592 + _exit(23); 593 + 594 + if (write(fd, "test", 4) != 4) { 595 + close(fd); 596 + _exit(24); 597 + } 598 + 599 + close(fd); 600 + 601 + if (access("/testfile", F_OK)) 602 + _exit(25); 603 + 604 + _exit(0); 605 + } 606 + 607 + ASSERT_EQ(wait_for_pid(pid), 0); 608 + } 609 + 610 + /* 611 + * Tests below do not require UNSHARE_EMPTY_MNTNS support. 612 + */ 613 + 614 + /* Invalid unshare flags return EINVAL */ 615 + TEST(invalid_flags) 616 + { 617 + pid_t pid; 618 + 619 + pid = fork(); 620 + ASSERT_GE(pid, 0); 621 + 622 + if (pid == 0) { 623 + if (enter_userns()) 624 + _exit(1); 625 + 626 + if (unshare(0x80000000) == 0) 627 + _exit(2); 628 + 629 + if (errno != EINVAL) 630 + _exit(3); 631 + 632 + _exit(0); 633 + } 634 + 635 + ASSERT_EQ(wait_for_pid(pid), 0); 636 + } 637 + 638 + /* Regular CLONE_NEWNS still copies the full mount tree */ 639 + TEST(clone_newns_full_copy) 640 + { 641 + pid_t pid; 642 + 643 + pid = fork(); 644 + ASSERT_GE(pid, 0); 645 + 646 + if (pid == 0) { 647 + ssize_t nr_mounts_before, nr_mounts_after; 648 + char tmpdir[] = "/tmp/empty_mntns_regr.XXXXXX"; 649 + int i; 650 + 651 + if (enter_userns()) 652 + _exit(1); 653 + 654 + if (unshare(CLONE_NEWNS)) 655 + _exit(2); 656 + 657 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 658 + _exit(3); 659 + 660 + if (!mkdtemp(tmpdir)) 661 + _exit(4); 662 + 663 + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 664 + _exit(5); 665 + 666 + for (i = 0; i < 3; i++) { 667 + char subdir[256]; 668 + 669 + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); 670 + if (mkdir(subdir, 0755) && errno != EEXIST) 671 + _exit(6); 672 + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) 673 + _exit(7); 674 + } 675 + 676 + nr_mounts_before = count_mounts(); 677 + if (nr_mounts_before < 3) 678 + _exit(8); 679 + 680 + if (unshare(CLONE_NEWNS)) 681 + _exit(9); 682 + 683 + nr_mounts_after = count_mounts(); 684 + if (nr_mounts_after < nr_mounts_before) 685 + _exit(10); 686 + 687 + _exit(0); 688 + } 689 + 690 + ASSERT_EQ(wait_for_pid(pid), 0); 691 + } 692 + 693 + /* Other namespace unshares are unaffected */ 694 + TEST(other_ns_unaffected) 695 + { 696 + pid_t pid; 697 + 698 + pid = fork(); 699 + ASSERT_GE(pid, 0); 700 + 701 + if (pid == 0) { 702 + char hostname[256]; 703 + 704 + if (enter_userns()) 705 + _exit(1); 706 + 707 + if (unshare(CLONE_NEWUTS)) 708 + _exit(2); 709 + 710 + if (sethostname("test-empty-mntns", 16)) 711 + _exit(3); 712 + 713 + if (gethostname(hostname, sizeof(hostname))) 714 + _exit(4); 715 + 716 + if (strcmp(hostname, "test-empty-mntns") != 0) 717 + _exit(5); 718 + 719 + _exit(0); 720 + } 721 + 722 + ASSERT_EQ(wait_for_pid(pid), 0); 723 + } 724 + 725 + TEST_HARNESS_MAIN
+225
tools/testing/selftests/filesystems/empty_mntns/overmount_chroot_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Test: rootfs overmounted multiple times with chroot into topmost 4 + * 5 + * This test creates a scenario where: 6 + * 1. A new mount namespace is created with a tmpfs root (via pivot_root) 7 + * 2. A mountpoint is created and overmounted multiple times 8 + * 3. The caller chroots into the topmost mount layer 9 + * 10 + * The test verifies that: 11 + * - Multiple overmounts create separate mount layers 12 + * - Each layer's files are isolated 13 + * - chroot correctly sets the process's root to the topmost layer 14 + * - After chroot, only the topmost layer's files are visible 15 + * 16 + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> 17 + */ 18 + 19 + #define _GNU_SOURCE 20 + #include <fcntl.h> 21 + #include <linux/mount.h> 22 + #include <linux/stat.h> 23 + #include <sched.h> 24 + #include <stdio.h> 25 + #include <string.h> 26 + #include <sys/mount.h> 27 + #include <sys/stat.h> 28 + #include <sys/syscall.h> 29 + #include <sys/types.h> 30 + #include <sys/wait.h> 31 + #include <unistd.h> 32 + 33 + #include "../utils.h" 34 + #include "empty_mntns.h" 35 + #include "kselftest_harness.h" 36 + 37 + #define NR_OVERMOUNTS 5 38 + 39 + /* 40 + * Setup a proper root filesystem using pivot_root. 41 + * This ensures we own the root directory in our user namespace. 42 + */ 43 + static int setup_root(void) 44 + { 45 + char tmpdir[] = "/tmp/overmount_test.XXXXXX"; 46 + char oldroot[256]; 47 + 48 + if (!mkdtemp(tmpdir)) 49 + return -1; 50 + 51 + /* Mount tmpfs at the temporary directory */ 52 + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=10M")) 53 + return -1; 54 + 55 + /* Create directory for old root */ 56 + snprintf(oldroot, sizeof(oldroot), "%s/oldroot", tmpdir); 57 + if (mkdir(oldroot, 0755)) 58 + return -1; 59 + 60 + /* pivot_root to use the tmpfs as new root */ 61 + if (syscall(SYS_pivot_root, tmpdir, oldroot)) 62 + return -1; 63 + 64 + if (chdir("/")) 65 + return -1; 66 + 67 + /* Unmount old root */ 68 + if (umount2("/oldroot", MNT_DETACH)) 69 + return -1; 70 + 71 + /* Remove oldroot directory */ 72 + if (rmdir("/oldroot")) 73 + return -1; 74 + 75 + return 0; 76 + } 77 + 78 + /* 79 + * Test scenario: 80 + * 1. Enter a user namespace to gain CAP_SYS_ADMIN 81 + * 2. Create a new mount namespace 82 + * 3. Setup a tmpfs root via pivot_root 83 + * 4. Create a mountpoint /newroot and overmount it multiple times 84 + * 5. Create a marker file in each layer 85 + * 6. Chroot into /newroot (the topmost overmount) 86 + * 7. Verify we're in the topmost layer (only topmost marker visible) 87 + */ 88 + TEST(overmount_chroot) 89 + { 90 + pid_t pid; 91 + 92 + pid = fork(); 93 + ASSERT_GE(pid, 0); 94 + 95 + if (pid == 0) { 96 + ssize_t nr_mounts; 97 + uint64_t mnt_ids[NR_OVERMOUNTS + 1]; 98 + uint64_t root_id_before, root_id_after; 99 + struct statmount *sm; 100 + char marker[64]; 101 + int fd, i; 102 + 103 + /* Step 1: Enter user namespace for privileges */ 104 + if (enter_userns()) 105 + _exit(1); 106 + 107 + /* Step 2: Create a new mount namespace */ 108 + if (unshare(CLONE_NEWNS)) 109 + _exit(2); 110 + 111 + /* Step 3: Make the mount tree private */ 112 + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 113 + _exit(3); 114 + 115 + /* Step 4: Setup a proper tmpfs root via pivot_root */ 116 + if (setup_root()) 117 + _exit(4); 118 + 119 + /* Create the base mount point for overmounting */ 120 + if (mkdir("/newroot", 0755)) 121 + _exit(5); 122 + 123 + /* Mount base tmpfs on /newroot */ 124 + if (mount("tmpfs", "/newroot", "tmpfs", 0, "size=1M")) 125 + _exit(6); 126 + 127 + /* Record base mount ID */ 128 + mnt_ids[0] = get_unique_mnt_id("/newroot"); 129 + if (!mnt_ids[0]) 130 + _exit(7); 131 + 132 + /* Create marker in base layer */ 133 + fd = open("/newroot/layer_0", O_CREAT | O_RDWR, 0644); 134 + if (fd < 0) 135 + _exit(8); 136 + if (write(fd, "layer_0", 7) != 7) { 137 + close(fd); 138 + _exit(9); 139 + } 140 + close(fd); 141 + 142 + /* Step 5: Overmount /newroot multiple times with tmpfs */ 143 + for (i = 0; i < NR_OVERMOUNTS; i++) { 144 + if (mount("tmpfs", "/newroot", "tmpfs", 0, "size=1M")) 145 + _exit(10); 146 + 147 + /* Record mount ID for this layer */ 148 + mnt_ids[i + 1] = get_unique_mnt_id("/newroot"); 149 + if (!mnt_ids[i + 1]) 150 + _exit(11); 151 + 152 + /* Create a marker file in each layer */ 153 + snprintf(marker, sizeof(marker), "/newroot/layer_%d", i + 1); 154 + fd = open(marker, O_CREAT | O_RDWR, 0644); 155 + if (fd < 0) 156 + _exit(12); 157 + 158 + if (write(fd, marker, strlen(marker)) != (ssize_t)strlen(marker)) { 159 + close(fd); 160 + _exit(13); 161 + } 162 + close(fd); 163 + } 164 + 165 + /* Verify mount count increased */ 166 + nr_mounts = count_mounts(); 167 + if (nr_mounts < NR_OVERMOUNTS + 2) 168 + _exit(14); 169 + 170 + /* Record root mount ID before chroot */ 171 + root_id_before = get_unique_mnt_id("/newroot"); 172 + 173 + /* Verify this is the topmost layer's mount */ 174 + if (root_id_before != mnt_ids[NR_OVERMOUNTS]) 175 + _exit(15); 176 + 177 + /* Step 6: Chroot into /newroot (the topmost overmount) */ 178 + if (chroot("/newroot")) 179 + _exit(16); 180 + 181 + /* Change to root directory within the chroot */ 182 + if (chdir("/")) 183 + _exit(17); 184 + 185 + /* Step 7: Verify we're in the topmost layer */ 186 + root_id_after = get_unique_mnt_id("/"); 187 + 188 + /* The mount ID should be the same as the topmost layer */ 189 + if (root_id_after != mnt_ids[NR_OVERMOUNTS]) 190 + _exit(18); 191 + 192 + /* Verify the topmost layer's marker file exists */ 193 + snprintf(marker, sizeof(marker), "/layer_%d", NR_OVERMOUNTS); 194 + if (access(marker, F_OK)) 195 + _exit(19); 196 + 197 + /* Verify we cannot see markers from lower layers (they're hidden) */ 198 + for (i = 0; i < NR_OVERMOUNTS; i++) { 199 + snprintf(marker, sizeof(marker), "/layer_%d", i); 200 + if (access(marker, F_OK) == 0) 201 + _exit(20); 202 + } 203 + 204 + /* Verify the root mount is tmpfs */ 205 + sm = statmount_alloc(root_id_after, 0, 206 + STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT | 207 + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE); 208 + if (!sm) 209 + _exit(21); 210 + 211 + if (sm->mask & STATMOUNT_FS_TYPE) { 212 + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) { 213 + free(sm); 214 + _exit(22); 215 + } 216 + } 217 + 218 + free(sm); 219 + _exit(0); 220 + } 221 + 222 + ASSERT_EQ(wait_for_pid(pid), 0); 223 + } 224 + 225 + TEST_HARNESS_MAIN
+2 -2
tools/testing/selftests/filesystems/utils.c
··· 158 158 _exit(0); 159 159 } 160 160 161 - static int wait_for_pid(pid_t pid) 161 + int wait_for_pid(pid_t pid) 162 162 { 163 163 int status, ret; 164 164 ··· 450 450 return fret; 451 451 } 452 452 453 - static int write_file(const char *path, const char *val) 453 + int write_file(const char *path, const char *val) 454 454 { 455 455 int fd = open(path, O_WRONLY); 456 456 size_t len = strlen(val);
+2
tools/testing/selftests/filesystems/utils.h
··· 44 44 return true; 45 45 } 46 46 47 + extern int wait_for_pid(pid_t pid); 48 + extern int write_file(const char *path, const char *val); 47 49 extern uint64_t get_unique_mnt_id(const char *path); 48 50 49 51 #endif /* __IDMAP_UTILS_H */